{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 192,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' WES exploration. ultimately want to append WES mutation info to our \\n    existing by-patient/sample validation table '"
      ]
     },
     "execution_count": 192,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"\"\" WES exploration. ultimately want to append WES mutation info to our \n",
    "    existing by-patient/sample validation table \"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = 's3://darmanis-group/singlecell_lungadeno/non_immune/nonImmune_bams_9.27/bulk_vcf1/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "out = 'out.txt'\n",
    "! aws s3 ls $path > $out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:16:56</td>\n",
       "      <td>50584627</td>\n",
       "      <td>TH041_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>23:51:43</td>\n",
       "      <td>70219393</td>\n",
       "      <td>TH067_E3_WB3.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>17:52:02</td>\n",
       "      <td>48457973</td>\n",
       "      <td>TH067_E3_WB4.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>21:45:03</td>\n",
       "      <td>57620820</td>\n",
       "      <td>TH067_E4.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>21:49:23</td>\n",
       "      <td>78150367</td>\n",
       "      <td>TH067_E7.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:14:19</td>\n",
       "      <td>55255499</td>\n",
       "      <td>TH107_E2_WB2.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:31:39</td>\n",
       "      <td>67160476</td>\n",
       "      <td>TH116_E2.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>17:49:13</td>\n",
       "      <td>47331380</td>\n",
       "      <td>TH116_E4_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>19:53:10</td>\n",
       "      <td>47791771</td>\n",
       "      <td>TH146_E7_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>17:39:46</td>\n",
       "      <td>46342972</td>\n",
       "      <td>TH150_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:08:31</td>\n",
       "      <td>65418674</td>\n",
       "      <td>TH153_E6_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>17:52:44</td>\n",
       "      <td>48805723</td>\n",
       "      <td>TH155_E3_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>19:49:30</td>\n",
       "      <td>63588132</td>\n",
       "      <td>TH155_E5.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2019-03-13</td>\n",
       "      <td>00:24:40</td>\n",
       "      <td>91443292</td>\n",
       "      <td>TH156_E2_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>17:41:05</td>\n",
       "      <td>46654253</td>\n",
       "      <td>TH158_E2_WB3.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:02:42</td>\n",
       "      <td>64552991</td>\n",
       "      <td>TH169_E2.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:15:05</td>\n",
       "      <td>55956825</td>\n",
       "      <td>TH169_E4.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>17:32:34</td>\n",
       "      <td>44999830</td>\n",
       "      <td>TH169_E6_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:02:12</td>\n",
       "      <td>48182897</td>\n",
       "      <td>TH171_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:08:34</td>\n",
       "      <td>63289996</td>\n",
       "      <td>TH171_E3.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>17:44:34</td>\n",
       "      <td>44972910</td>\n",
       "      <td>TH172_E2_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:04:55</td>\n",
       "      <td>60293416</td>\n",
       "      <td>TH172_E3.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>21:17:54</td>\n",
       "      <td>94059978</td>\n",
       "      <td>TH174_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:39:25</td>\n",
       "      <td>37100920</td>\n",
       "      <td>TH178_E2_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>19:34:24</td>\n",
       "      <td>58641302</td>\n",
       "      <td>TH179_E1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>17:59:28</td>\n",
       "      <td>49556001</td>\n",
       "      <td>TH179_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:16:55</td>\n",
       "      <td>51053837</td>\n",
       "      <td>TH183_E2_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:21:21</td>\n",
       "      <td>53131314</td>\n",
       "      <td>TH187_E2_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>21:38:37</td>\n",
       "      <td>91807918</td>\n",
       "      <td>TH187_E3.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>21:18:25</td>\n",
       "      <td>71441804</td>\n",
       "      <td>TH199_E3_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:40:51</td>\n",
       "      <td>53837825</td>\n",
       "      <td>TH205_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:43:56</td>\n",
       "      <td>67140192</td>\n",
       "      <td>TH205_E2.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:15:26</td>\n",
       "      <td>51989909</td>\n",
       "      <td>TH208_E2_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:29:56</td>\n",
       "      <td>62918994</td>\n",
       "      <td>TH208_E3.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>22:01:01</td>\n",
       "      <td>71390756</td>\n",
       "      <td>TH208_E4.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>21:44:57</td>\n",
       "      <td>90080483</td>\n",
       "      <td>TH210_E1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:45:05</td>\n",
       "      <td>57445713</td>\n",
       "      <td>TH210_E2_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>21:33:26</td>\n",
       "      <td>82027727</td>\n",
       "      <td>TH217_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:51:29</td>\n",
       "      <td>57275298</td>\n",
       "      <td>TH218_E3_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:06:50</td>\n",
       "      <td>51587239</td>\n",
       "      <td>TH220_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>21:06:54</td>\n",
       "      <td>71215627</td>\n",
       "      <td>TH220_E2.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>22:32:18</td>\n",
       "      <td>65386503</td>\n",
       "      <td>TH222_E1_CBC1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:23:36</td>\n",
       "      <td>71688537</td>\n",
       "      <td>TH226_E3.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:07:32</td>\n",
       "      <td>50866128</td>\n",
       "      <td>TH226_E3_WB4.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>19:44:04</td>\n",
       "      <td>66771263</td>\n",
       "      <td>TH227_E2_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>22:27:41</td>\n",
       "      <td>67118701</td>\n",
       "      <td>TH231_E1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:11:26</td>\n",
       "      <td>50463063</td>\n",
       "      <td>TH231_E4_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>21:17:27</td>\n",
       "      <td>71610954</td>\n",
       "      <td>TH236_E4_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>22:11:03</td>\n",
       "      <td>78227346</td>\n",
       "      <td>TH238_E1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>18:32:00</td>\n",
       "      <td>54210632</td>\n",
       "      <td>TH238_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>22:16:56</td>\n",
       "      <td>86938604</td>\n",
       "      <td>TH248_E3_WB1.vcf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>2019-03-12</td>\n",
       "      <td>20:17:07</td>\n",
       "      <td>91778103</td>\n",
       "      <td>TH266_E1_WB1.vcf</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          date      time      size               name\n",
       "0   2019-03-12  18:16:56  50584627   TH041_E1_WB1.vcf\n",
       "1   2019-03-12  23:51:43  70219393   TH067_E3_WB3.vcf\n",
       "2   2019-03-12  17:52:02  48457973   TH067_E3_WB4.vcf\n",
       "3   2019-03-12  21:45:03  57620820       TH067_E4.vcf\n",
       "4   2019-03-12  21:49:23  78150367       TH067_E7.vcf\n",
       "5   2019-03-12  18:14:19  55255499   TH107_E2_WB2.vcf\n",
       "6   2019-03-12  20:31:39  67160476       TH116_E2.vcf\n",
       "7   2019-03-12  17:49:13  47331380   TH116_E4_WB1.vcf\n",
       "8   2019-03-12  19:53:10  47791771   TH146_E7_WB1.vcf\n",
       "9   2019-03-12  17:39:46  46342972   TH150_E1_WB1.vcf\n",
       "10  2019-03-12  20:08:31  65418674   TH153_E6_WB1.vcf\n",
       "11  2019-03-12  17:52:44  48805723   TH155_E3_WB1.vcf\n",
       "12  2019-03-12  19:49:30  63588132       TH155_E5.vcf\n",
       "13  2019-03-13  00:24:40  91443292   TH156_E2_WB1.vcf\n",
       "14  2019-03-12  17:41:05  46654253   TH158_E2_WB3.vcf\n",
       "15  2019-03-12  20:02:42  64552991       TH169_E2.vcf\n",
       "16  2019-03-12  20:15:05  55956825       TH169_E4.vcf\n",
       "17  2019-03-12  17:32:34  44999830   TH169_E6_WB1.vcf\n",
       "18  2019-03-12  18:02:12  48182897   TH171_E1_WB1.vcf\n",
       "19  2019-03-12  20:08:34  63289996       TH171_E3.vcf\n",
       "20  2019-03-12  17:44:34  44972910   TH172_E2_WB1.vcf\n",
       "21  2019-03-12  20:04:55  60293416       TH172_E3.vcf\n",
       "22  2019-03-12  21:17:54  94059978   TH174_E1_WB1.vcf\n",
       "23  2019-03-12  18:39:25  37100920   TH178_E2_WB1.vcf\n",
       "24  2019-03-12  19:34:24  58641302       TH179_E1.vcf\n",
       "25  2019-03-12  17:59:28  49556001   TH179_E1_WB1.vcf\n",
       "26  2019-03-12  18:16:55  51053837   TH183_E2_WB1.vcf\n",
       "27  2019-03-12  18:21:21  53131314   TH187_E2_WB1.vcf\n",
       "28  2019-03-12  21:38:37  91807918       TH187_E3.vcf\n",
       "29  2019-03-12  21:18:25  71441804   TH199_E3_WB1.vcf\n",
       "30  2019-03-12  18:40:51  53837825   TH205_E1_WB1.vcf\n",
       "31  2019-03-12  20:43:56  67140192       TH205_E2.vcf\n",
       "32  2019-03-12  18:15:26  51989909   TH208_E2_WB1.vcf\n",
       "33  2019-03-12  20:29:56  62918994       TH208_E3.vcf\n",
       "34  2019-03-12  22:01:01  71390756       TH208_E4.vcf\n",
       "35  2019-03-12  21:44:57  90080483       TH210_E1.vcf\n",
       "36  2019-03-12  18:45:05  57445713   TH210_E2_WB1.vcf\n",
       "37  2019-03-12  21:33:26  82027727   TH217_E1_WB1.vcf\n",
       "38  2019-03-12  18:51:29  57275298   TH218_E3_WB1.vcf\n",
       "39  2019-03-12  20:06:50  51587239   TH220_E1_WB1.vcf\n",
       "40  2019-03-12  21:06:54  71215627       TH220_E2.vcf\n",
       "41  2019-03-12  22:32:18  65386503  TH222_E1_CBC1.vcf\n",
       "42  2019-03-12  20:23:36  71688537       TH226_E3.vcf\n",
       "43  2019-03-12  18:07:32  50866128   TH226_E3_WB4.vcf\n",
       "44  2019-03-12  19:44:04  66771263   TH227_E2_WB1.vcf\n",
       "45  2019-03-12  22:27:41  67118701       TH231_E1.vcf\n",
       "46  2019-03-12  18:11:26  50463063   TH231_E4_WB1.vcf\n",
       "47  2019-03-12  21:17:27  71610954   TH236_E4_WB1.vcf\n",
       "48  2019-03-12  22:11:03  78227346       TH238_E1.vcf\n",
       "49  2019-03-12  18:32:00  54210632   TH238_E1_WB1.vcf\n",
       "50  2019-03-12  22:16:56  86938604   TH248_E3_WB1.vcf\n",
       "51  2019-03-12  20:17:07  91778103   TH266_E1_WB1.vcf"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bulk_vcf_dir = pd.read_csv('out.txt', delim_whitespace=True, names=['date', 'time', 'size', 'name'])\n",
    "bulk_vcf_dir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['TH041_E1_WB1.vcf',\n",
       " 'TH067_E3_WB3.vcf',\n",
       " 'TH067_E3_WB4.vcf',\n",
       " 'TH067_E4.vcf',\n",
       " 'TH067_E7.vcf',\n",
       " 'TH107_E2_WB2.vcf',\n",
       " 'TH116_E2.vcf',\n",
       " 'TH116_E4_WB1.vcf',\n",
       " 'TH146_E7_WB1.vcf',\n",
       " 'TH150_E1_WB1.vcf',\n",
       " 'TH153_E6_WB1.vcf',\n",
       " 'TH155_E3_WB1.vcf',\n",
       " 'TH155_E5.vcf',\n",
       " 'TH156_E2_WB1.vcf',\n",
       " 'TH158_E2_WB3.vcf',\n",
       " 'TH169_E2.vcf',\n",
       " 'TH169_E4.vcf',\n",
       " 'TH169_E6_WB1.vcf',\n",
       " 'TH171_E1_WB1.vcf',\n",
       " 'TH171_E3.vcf',\n",
       " 'TH172_E2_WB1.vcf',\n",
       " 'TH172_E3.vcf',\n",
       " 'TH174_E1_WB1.vcf',\n",
       " 'TH178_E2_WB1.vcf',\n",
       " 'TH179_E1.vcf',\n",
       " 'TH179_E1_WB1.vcf',\n",
       " 'TH183_E2_WB1.vcf',\n",
       " 'TH187_E2_WB1.vcf',\n",
       " 'TH187_E3.vcf',\n",
       " 'TH199_E3_WB1.vcf',\n",
       " 'TH205_E1_WB1.vcf',\n",
       " 'TH205_E2.vcf',\n",
       " 'TH208_E2_WB1.vcf',\n",
       " 'TH208_E3.vcf',\n",
       " 'TH208_E4.vcf',\n",
       " 'TH210_E1.vcf',\n",
       " 'TH210_E2_WB1.vcf',\n",
       " 'TH217_E1_WB1.vcf',\n",
       " 'TH218_E3_WB1.vcf',\n",
       " 'TH220_E1_WB1.vcf',\n",
       " 'TH220_E2.vcf',\n",
       " 'TH222_E1_CBC1.vcf',\n",
       " 'TH226_E3.vcf',\n",
       " 'TH226_E3_WB4.vcf',\n",
       " 'TH227_E2_WB1.vcf',\n",
       " 'TH231_E1.vcf',\n",
       " 'TH231_E4_WB1.vcf',\n",
       " 'TH236_E4_WB1.vcf',\n",
       " 'TH238_E1.vcf',\n",
       " 'TH238_E1_WB1.vcf',\n",
       " 'TH248_E3_WB1.vcf',\n",
       " 'TH266_E1_WB1.vcf']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_names = list(bulk_vcf_dir.name)\n",
    "file_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['TH041_E1_WB1',\n",
       " 'TH067_E3_WB3',\n",
       " 'TH067_E3_WB4',\n",
       " 'TH067_E4',\n",
       " 'TH067_E7',\n",
       " 'TH107_E2_WB2',\n",
       " 'TH116_E2',\n",
       " 'TH116_E4_WB1',\n",
       " 'TH146_E7_WB1',\n",
       " 'TH150_E1_WB1',\n",
       " 'TH153_E6_WB1',\n",
       " 'TH155_E3_WB1',\n",
       " 'TH155_E5',\n",
       " 'TH156_E2_WB1',\n",
       " 'TH158_E2_WB3',\n",
       " 'TH169_E2',\n",
       " 'TH169_E4',\n",
       " 'TH169_E6_WB1',\n",
       " 'TH171_E1_WB1',\n",
       " 'TH171_E3',\n",
       " 'TH172_E2_WB1',\n",
       " 'TH172_E3',\n",
       " 'TH174_E1_WB1',\n",
       " 'TH178_E2_WB1',\n",
       " 'TH179_E1',\n",
       " 'TH179_E1_WB1',\n",
       " 'TH183_E2_WB1',\n",
       " 'TH187_E2_WB1',\n",
       " 'TH187_E3',\n",
       " 'TH199_E3_WB1',\n",
       " 'TH205_E1_WB1',\n",
       " 'TH205_E2',\n",
       " 'TH208_E2_WB1',\n",
       " 'TH208_E3',\n",
       " 'TH208_E4',\n",
       " 'TH210_E1',\n",
       " 'TH210_E2_WB1',\n",
       " 'TH217_E1_WB1',\n",
       " 'TH218_E3_WB1',\n",
       " 'TH220_E1_WB1',\n",
       " 'TH220_E2',\n",
       " 'TH222_E1_CBC1',\n",
       " 'TH226_E3',\n",
       " 'TH226_E3_WB4',\n",
       " 'TH227_E2_WB1',\n",
       " 'TH231_E1',\n",
       " 'TH231_E4_WB1',\n",
       " 'TH236_E4_WB1',\n",
       " 'TH238_E1',\n",
       " 'TH238_E1_WB1',\n",
       " 'TH248_E3_WB1',\n",
       " 'TH266_E1_WB1']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample_names = [x.strip('.vcf') for x in file_names]\n",
    "sample_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "33\n"
     ]
    }
   ],
   "source": [
    "patients_w_WES = [x.split('_')[0] for x in sample_names]\n",
    "patients_w_WES_u = set(patients_w_WES)\n",
    "print(len(patients_w_WES_u))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 323,
   "metadata": {},
   "outputs": [],
   "source": [
    "#//////////////////////////////////////////////////////////////////////////////\n",
    "#//////////////////////////////////////////////////////////////////////////////\n",
    "#///////////    how many patients do we have WES for?     /////////////////////\n",
    "#//////////////////////////////////////////////////////////////////////////////"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>nGene</th>\n",
       "      <th>nReads</th>\n",
       "      <th>orig.ident</th>\n",
       "      <th>well</th>\n",
       "      <th>plate</th>\n",
       "      <th>cell_id</th>\n",
       "      <th>sample_name</th>\n",
       "      <th>patient_id</th>\n",
       "      <th>DOB</th>\n",
       "      <th>...</th>\n",
       "      <th>res.0.1</th>\n",
       "      <th>res.0.3</th>\n",
       "      <th>res.0.5</th>\n",
       "      <th>res.0.7</th>\n",
       "      <th>res.0.9</th>\n",
       "      <th>S.Score</th>\n",
       "      <th>G2M.Score</th>\n",
       "      <th>Phase</th>\n",
       "      <th>main_seurat_cluster</th>\n",
       "      <th>immune_annotation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A10_1001000329</td>\n",
       "      <td>2969</td>\n",
       "      <td>594781</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>1001000329</td>\n",
       "      <td>A10_1001000329</td>\n",
       "      <td>LT_S07</td>\n",
       "      <td>TH103</td>\n",
       "      <td>1971-07-27</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>-0.133188</td>\n",
       "      <td>-0.421943</td>\n",
       "      <td>G1</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A10_1001000407</td>\n",
       "      <td>2265</td>\n",
       "      <td>662644</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>1001000407</td>\n",
       "      <td>A10_1001000407</td>\n",
       "      <td>LT_S21</td>\n",
       "      <td>TH185</td>\n",
       "      <td>1961-12-29</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>-0.185666</td>\n",
       "      <td>-0.063043</td>\n",
       "      <td>G1</td>\n",
       "      <td>3</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A10_1001000408</td>\n",
       "      <td>7085</td>\n",
       "      <td>602263</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>1001000408</td>\n",
       "      <td>A10_1001000408</td>\n",
       "      <td>LT_S21</td>\n",
       "      <td>TH185</td>\n",
       "      <td>1961-12-29</td>\n",
       "      <td>...</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>-0.180289</td>\n",
       "      <td>-0.535851</td>\n",
       "      <td>G1</td>\n",
       "      <td>8</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A10_1001000410</td>\n",
       "      <td>1914</td>\n",
       "      <td>185720</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>1001000410</td>\n",
       "      <td>A10_1001000410</td>\n",
       "      <td>LT_S21</td>\n",
       "      <td>TH185</td>\n",
       "      <td>1961-12-29</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>8</td>\n",
       "      <td>-0.005365</td>\n",
       "      <td>-0.231767</td>\n",
       "      <td>G1</td>\n",
       "      <td>10</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A10_1001000412</td>\n",
       "      <td>7274</td>\n",
       "      <td>914254</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>1001000412</td>\n",
       "      <td>A10_1001000412</td>\n",
       "      <td>LT_S21</td>\n",
       "      <td>TH185</td>\n",
       "      <td>1961-12-29</td>\n",
       "      <td>...</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>-0.377089</td>\n",
       "      <td>-0.492793</td>\n",
       "      <td>G1</td>\n",
       "      <td>8</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>A10_B000420</td>\n",
       "      <td>2464</td>\n",
       "      <td>101565</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B000420</td>\n",
       "      <td>A10_B000420</td>\n",
       "      <td>LT_S66</td>\n",
       "      <td>TH238</td>\n",
       "      <td>1949-08-25</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>-0.200227</td>\n",
       "      <td>0.030644</td>\n",
       "      <td>G2M</td>\n",
       "      <td>13</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>A10_B000422</td>\n",
       "      <td>3424</td>\n",
       "      <td>748360</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B000422</td>\n",
       "      <td>A10_B000422</td>\n",
       "      <td>LT_S66</td>\n",
       "      <td>TH238</td>\n",
       "      <td>1949-08-25</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.141943</td>\n",
       "      <td>0.025751</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>A10_B000580</td>\n",
       "      <td>3984</td>\n",
       "      <td>3184663</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B000580</td>\n",
       "      <td>A10_B000580</td>\n",
       "      <td>LT_S50</td>\n",
       "      <td>TH225</td>\n",
       "      <td>1940-06-25</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.117926</td>\n",
       "      <td>-0.303089</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>A10_B000863</td>\n",
       "      <td>1093</td>\n",
       "      <td>391731</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B000863</td>\n",
       "      <td>A10_B000863</td>\n",
       "      <td>LT_S47</td>\n",
       "      <td>TH220</td>\n",
       "      <td>1946-09-18</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>-0.124229</td>\n",
       "      <td>-0.203540</td>\n",
       "      <td>G1</td>\n",
       "      <td>3</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>A10_B001007</td>\n",
       "      <td>717</td>\n",
       "      <td>399863</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001007</td>\n",
       "      <td>A10_B001007</td>\n",
       "      <td>LT_S82</td>\n",
       "      <td>TH226</td>\n",
       "      <td>1954-12-26</td>\n",
       "      <td>...</td>\n",
       "      <td>10</td>\n",
       "      <td>15</td>\n",
       "      <td>19</td>\n",
       "      <td>19</td>\n",
       "      <td>22</td>\n",
       "      <td>0.151958</td>\n",
       "      <td>-0.111904</td>\n",
       "      <td>S</td>\n",
       "      <td>19</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>A10_B001008</td>\n",
       "      <td>3483</td>\n",
       "      <td>993763</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001008</td>\n",
       "      <td>A10_B001008</td>\n",
       "      <td>LT_S82</td>\n",
       "      <td>TH226</td>\n",
       "      <td>1954-12-26</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.257014</td>\n",
       "      <td>-0.217378</td>\n",
       "      <td>G1</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>A10_B001010</td>\n",
       "      <td>1911</td>\n",
       "      <td>730050</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001010</td>\n",
       "      <td>A10_B001010</td>\n",
       "      <td>LT_S82</td>\n",
       "      <td>TH226</td>\n",
       "      <td>1954-12-26</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.004001</td>\n",
       "      <td>0.422330</td>\n",
       "      <td>G2M</td>\n",
       "      <td>1</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>A10_B001470</td>\n",
       "      <td>2454</td>\n",
       "      <td>141655</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001470</td>\n",
       "      <td>A10_B001470</td>\n",
       "      <td>LT_S74</td>\n",
       "      <td>TH248</td>\n",
       "      <td>1965-05-04</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>19</td>\n",
       "      <td>-0.126570</td>\n",
       "      <td>-0.392595</td>\n",
       "      <td>G1</td>\n",
       "      <td>6</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>A10_B001474</td>\n",
       "      <td>1999</td>\n",
       "      <td>165042</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001474</td>\n",
       "      <td>A10_B001474</td>\n",
       "      <td>LT_S74</td>\n",
       "      <td>TH248</td>\n",
       "      <td>1965-05-04</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>14</td>\n",
       "      <td>16</td>\n",
       "      <td>14</td>\n",
       "      <td>0.037971</td>\n",
       "      <td>-0.312922</td>\n",
       "      <td>S</td>\n",
       "      <td>14</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>A10_B001545</td>\n",
       "      <td>4933</td>\n",
       "      <td>732697</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001545</td>\n",
       "      <td>A10_B001545</td>\n",
       "      <td>LT_S79</td>\n",
       "      <td>TH179</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>-0.246892</td>\n",
       "      <td>-0.270221</td>\n",
       "      <td>G1</td>\n",
       "      <td>3</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>A10_B001548</td>\n",
       "      <td>3745</td>\n",
       "      <td>1913208</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001548</td>\n",
       "      <td>A10_B001548</td>\n",
       "      <td>LT_S79</td>\n",
       "      <td>TH179</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>0.077639</td>\n",
       "      <td>-0.227272</td>\n",
       "      <td>S</td>\n",
       "      <td>5</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>A10_B001551</td>\n",
       "      <td>1334</td>\n",
       "      <td>89066</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001551</td>\n",
       "      <td>A10_B001551</td>\n",
       "      <td>LT_S80</td>\n",
       "      <td>TH179</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "      <td>-0.214978</td>\n",
       "      <td>-0.307657</td>\n",
       "      <td>G1</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>A10_B001554</td>\n",
       "      <td>2554</td>\n",
       "      <td>2223066</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001554</td>\n",
       "      <td>A10_B001554</td>\n",
       "      <td>LT_S78</td>\n",
       "      <td>TH179_NAT</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>-0.026382</td>\n",
       "      <td>-0.139074</td>\n",
       "      <td>G1</td>\n",
       "      <td>5</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>A10_B001556</td>\n",
       "      <td>2696</td>\n",
       "      <td>108922</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001556</td>\n",
       "      <td>A10_B001556</td>\n",
       "      <td>LT_S80</td>\n",
       "      <td>TH179</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>0.042820</td>\n",
       "      <td>-0.131778</td>\n",
       "      <td>S</td>\n",
       "      <td>4</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>A10_B001557</td>\n",
       "      <td>3016</td>\n",
       "      <td>160550</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001557</td>\n",
       "      <td>A10_B001557</td>\n",
       "      <td>LT_S80</td>\n",
       "      <td>TH179</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>0.026602</td>\n",
       "      <td>-0.399065</td>\n",
       "      <td>S</td>\n",
       "      <td>4</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>A10_B001558</td>\n",
       "      <td>1820</td>\n",
       "      <td>1164138</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001558</td>\n",
       "      <td>A10_B001558</td>\n",
       "      <td>LT_S78</td>\n",
       "      <td>TH179_NAT</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>-0.077484</td>\n",
       "      <td>-0.260795</td>\n",
       "      <td>G1</td>\n",
       "      <td>5</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>A10_B001559</td>\n",
       "      <td>3438</td>\n",
       "      <td>263528</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001559</td>\n",
       "      <td>A10_B001559</td>\n",
       "      <td>LT_S80</td>\n",
       "      <td>TH179</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>-0.143188</td>\n",
       "      <td>-0.072381</td>\n",
       "      <td>G1</td>\n",
       "      <td>13</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>A10_B001608</td>\n",
       "      <td>1756</td>\n",
       "      <td>216449</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B001608</td>\n",
       "      <td>A10_B001608</td>\n",
       "      <td>LT_S28</td>\n",
       "      <td>TH185</td>\n",
       "      <td>1961-12-29</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>8</td>\n",
       "      <td>0.159249</td>\n",
       "      <td>0.144014</td>\n",
       "      <td>S</td>\n",
       "      <td>10</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>A10_B002073</td>\n",
       "      <td>1060</td>\n",
       "      <td>119872</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B002073</td>\n",
       "      <td>A10_B002073</td>\n",
       "      <td>LT_S65</td>\n",
       "      <td>TH238_NAT</td>\n",
       "      <td>1949-08-25</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "      <td>-0.064271</td>\n",
       "      <td>-0.201847</td>\n",
       "      <td>G1</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>A10_B002074</td>\n",
       "      <td>3203</td>\n",
       "      <td>502442</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B002074</td>\n",
       "      <td>A10_B002074</td>\n",
       "      <td>LT_S65</td>\n",
       "      <td>TH238_NAT</td>\n",
       "      <td>1949-08-25</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.321349</td>\n",
       "      <td>-0.193131</td>\n",
       "      <td>G1</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>A10_B002077</td>\n",
       "      <td>1340</td>\n",
       "      <td>198774</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B002077</td>\n",
       "      <td>A10_B002077</td>\n",
       "      <td>LT_S66</td>\n",
       "      <td>TH238</td>\n",
       "      <td>1949-08-25</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>7</td>\n",
       "      <td>0.020655</td>\n",
       "      <td>0.030858</td>\n",
       "      <td>G2M</td>\n",
       "      <td>1</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>A10_B002078</td>\n",
       "      <td>1944</td>\n",
       "      <td>785772</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B002078</td>\n",
       "      <td>A10_B002078</td>\n",
       "      <td>LT_S66</td>\n",
       "      <td>TH238</td>\n",
       "      <td>1949-08-25</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-0.263965</td>\n",
       "      <td>-0.162814</td>\n",
       "      <td>G1</td>\n",
       "      <td>1</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>A10_B002079</td>\n",
       "      <td>3008</td>\n",
       "      <td>543848</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B002079</td>\n",
       "      <td>A10_B002079</td>\n",
       "      <td>LT_S66</td>\n",
       "      <td>TH238</td>\n",
       "      <td>1949-08-25</td>\n",
       "      <td>...</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>16</td>\n",
       "      <td>-0.080785</td>\n",
       "      <td>-0.078821</td>\n",
       "      <td>G1</td>\n",
       "      <td>16</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>A10_B002572</td>\n",
       "      <td>6268</td>\n",
       "      <td>1212934</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B002572</td>\n",
       "      <td>A10_B002572</td>\n",
       "      <td>LT_S81</td>\n",
       "      <td>TH266</td>\n",
       "      <td>1978-09-20</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>-0.371432</td>\n",
       "      <td>-0.427129</td>\n",
       "      <td>G1</td>\n",
       "      <td>5</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>A10_B003048</td>\n",
       "      <td>1731</td>\n",
       "      <td>51919</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>A10</td>\n",
       "      <td>B003048</td>\n",
       "      <td>A10_B003048</td>\n",
       "      <td>LT_S23</td>\n",
       "      <td>TH185</td>\n",
       "      <td>1961-12-29</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>8</td>\n",
       "      <td>0.140251</td>\n",
       "      <td>-0.038638</td>\n",
       "      <td>S</td>\n",
       "      <td>10</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21379</th>\n",
       "      <td>P9_B001608</td>\n",
       "      <td>751</td>\n",
       "      <td>152200</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B001608</td>\n",
       "      <td>P9_B001608</td>\n",
       "      <td>LT_S28</td>\n",
       "      <td>TH185</td>\n",
       "      <td>1961-12-29</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>9</td>\n",
       "      <td>7</td>\n",
       "      <td>0.095965</td>\n",
       "      <td>-0.234211</td>\n",
       "      <td>S</td>\n",
       "      <td>15</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21380</th>\n",
       "      <td>P9_B002077</td>\n",
       "      <td>1152</td>\n",
       "      <td>180650</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B002077</td>\n",
       "      <td>P9_B002077</td>\n",
       "      <td>LT_S66</td>\n",
       "      <td>TH238</td>\n",
       "      <td>1949-08-25</td>\n",
       "      <td>...</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>16</td>\n",
       "      <td>-0.198534</td>\n",
       "      <td>-0.031337</td>\n",
       "      <td>G1</td>\n",
       "      <td>16</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21381</th>\n",
       "      <td>P9_B002079</td>\n",
       "      <td>2140</td>\n",
       "      <td>268841</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B002079</td>\n",
       "      <td>P9_B002079</td>\n",
       "      <td>LT_S66</td>\n",
       "      <td>TH238</td>\n",
       "      <td>1949-08-25</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>-0.220345</td>\n",
       "      <td>-0.245248</td>\n",
       "      <td>G1</td>\n",
       "      <td>13</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21382</th>\n",
       "      <td>P9_B002573</td>\n",
       "      <td>3223</td>\n",
       "      <td>1497817</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B002573</td>\n",
       "      <td>P9_B002573</td>\n",
       "      <td>LT_S81</td>\n",
       "      <td>TH266</td>\n",
       "      <td>1978-09-20</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>14</td>\n",
       "      <td>16</td>\n",
       "      <td>14</td>\n",
       "      <td>-0.026880</td>\n",
       "      <td>-0.063479</td>\n",
       "      <td>G1</td>\n",
       "      <td>14</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21383</th>\n",
       "      <td>P9_B003048</td>\n",
       "      <td>1555</td>\n",
       "      <td>146047</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003048</td>\n",
       "      <td>P9_B003048</td>\n",
       "      <td>LT_S23</td>\n",
       "      <td>TH185</td>\n",
       "      <td>1961-12-29</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>8</td>\n",
       "      <td>0.109896</td>\n",
       "      <td>-0.146143</td>\n",
       "      <td>S</td>\n",
       "      <td>10</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21384</th>\n",
       "      <td>P9_B003067</td>\n",
       "      <td>5160</td>\n",
       "      <td>7901455</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003067</td>\n",
       "      <td>P9_B003067</td>\n",
       "      <td>LT_S34</td>\n",
       "      <td>TH205</td>\n",
       "      <td>1964-05-24</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0.048504</td>\n",
       "      <td>-0.108903</td>\n",
       "      <td>S</td>\n",
       "      <td>2</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21385</th>\n",
       "      <td>P9_B003070</td>\n",
       "      <td>754</td>\n",
       "      <td>104624</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003070</td>\n",
       "      <td>P9_B003070</td>\n",
       "      <td>LT_S34</td>\n",
       "      <td>TH205</td>\n",
       "      <td>1964-05-24</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "      <td>-0.160681</td>\n",
       "      <td>0.014570</td>\n",
       "      <td>G2M</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21386</th>\n",
       "      <td>P9_B003071</td>\n",
       "      <td>1445</td>\n",
       "      <td>752933</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003071</td>\n",
       "      <td>P9_B003071</td>\n",
       "      <td>LT_S34</td>\n",
       "      <td>TH205</td>\n",
       "      <td>1964-05-24</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>-0.073801</td>\n",
       "      <td>-0.246584</td>\n",
       "      <td>G1</td>\n",
       "      <td>5</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21387</th>\n",
       "      <td>P9_B003105</td>\n",
       "      <td>535</td>\n",
       "      <td>187972</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003105</td>\n",
       "      <td>P9_B003105</td>\n",
       "      <td>LT_S72</td>\n",
       "      <td>TH222</td>\n",
       "      <td>1959-03-28</td>\n",
       "      <td>...</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>-0.089607</td>\n",
       "      <td>-0.093771</td>\n",
       "      <td>G1</td>\n",
       "      <td>12</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21388</th>\n",
       "      <td>P9_B003115</td>\n",
       "      <td>1118</td>\n",
       "      <td>1757656</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003115</td>\n",
       "      <td>P9_B003115</td>\n",
       "      <td>LT_S55</td>\n",
       "      <td>TH218</td>\n",
       "      <td>1958-11-20</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>-0.058187</td>\n",
       "      <td>-0.002288</td>\n",
       "      <td>G1</td>\n",
       "      <td>4</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21389</th>\n",
       "      <td>P9_B003125</td>\n",
       "      <td>1222</td>\n",
       "      <td>622796</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003125</td>\n",
       "      <td>P9_B003125</td>\n",
       "      <td>LT_S57</td>\n",
       "      <td>TH226</td>\n",
       "      <td>1954-12-26</td>\n",
       "      <td>...</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>-0.196344</td>\n",
       "      <td>0.007299</td>\n",
       "      <td>G2M</td>\n",
       "      <td>12</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21390</th>\n",
       "      <td>P9_B003126</td>\n",
       "      <td>2803</td>\n",
       "      <td>1860222</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003126</td>\n",
       "      <td>P9_B003126</td>\n",
       "      <td>LT_S57</td>\n",
       "      <td>TH226</td>\n",
       "      <td>1954-12-26</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>-0.267222</td>\n",
       "      <td>-0.069372</td>\n",
       "      <td>G1</td>\n",
       "      <td>7</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21391</th>\n",
       "      <td>P9_B003129</td>\n",
       "      <td>950</td>\n",
       "      <td>222677</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003129</td>\n",
       "      <td>P9_B003129</td>\n",
       "      <td>LT_S57</td>\n",
       "      <td>TH226</td>\n",
       "      <td>1954-12-26</td>\n",
       "      <td>...</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>-0.039355</td>\n",
       "      <td>-0.042824</td>\n",
       "      <td>G1</td>\n",
       "      <td>12</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21392</th>\n",
       "      <td>P9_B003132</td>\n",
       "      <td>749</td>\n",
       "      <td>228975</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003132</td>\n",
       "      <td>P9_B003132</td>\n",
       "      <td>LT_S57</td>\n",
       "      <td>TH226</td>\n",
       "      <td>1954-12-26</td>\n",
       "      <td>...</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>0.043213</td>\n",
       "      <td>-0.125724</td>\n",
       "      <td>S</td>\n",
       "      <td>12</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21393</th>\n",
       "      <td>P9_B003187</td>\n",
       "      <td>1805</td>\n",
       "      <td>636367</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003187</td>\n",
       "      <td>P9_B003187</td>\n",
       "      <td>LT_S57</td>\n",
       "      <td>TH226</td>\n",
       "      <td>1954-12-26</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>-0.224999</td>\n",
       "      <td>0.011019</td>\n",
       "      <td>G2M</td>\n",
       "      <td>5</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21394</th>\n",
       "      <td>P9_B003518</td>\n",
       "      <td>4232</td>\n",
       "      <td>566057</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003518</td>\n",
       "      <td>P9_B003518</td>\n",
       "      <td>LT_S63</td>\n",
       "      <td>TH171</td>\n",
       "      <td>1976-08-20</td>\n",
       "      <td>...</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>-0.494101</td>\n",
       "      <td>-0.249648</td>\n",
       "      <td>G1</td>\n",
       "      <td>11</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21395</th>\n",
       "      <td>P9_B003527</td>\n",
       "      <td>4539</td>\n",
       "      <td>2776906</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003527</td>\n",
       "      <td>P9_B003527</td>\n",
       "      <td>LT_S63</td>\n",
       "      <td>TH171</td>\n",
       "      <td>1976-08-20</td>\n",
       "      <td>...</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>-0.121439</td>\n",
       "      <td>-0.245387</td>\n",
       "      <td>G1</td>\n",
       "      <td>11</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21396</th>\n",
       "      <td>P9_B003528</td>\n",
       "      <td>1348</td>\n",
       "      <td>1044025</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003528</td>\n",
       "      <td>P9_B003528</td>\n",
       "      <td>LT_S58</td>\n",
       "      <td>TH179</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>14</td>\n",
       "      <td>16</td>\n",
       "      <td>14</td>\n",
       "      <td>-0.060097</td>\n",
       "      <td>-0.235275</td>\n",
       "      <td>G1</td>\n",
       "      <td>14</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21397</th>\n",
       "      <td>P9_B003529</td>\n",
       "      <td>1769</td>\n",
       "      <td>857686</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003529</td>\n",
       "      <td>P9_B003529</td>\n",
       "      <td>LT_S63</td>\n",
       "      <td>TH171</td>\n",
       "      <td>1976-08-20</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>-0.070309</td>\n",
       "      <td>-0.146753</td>\n",
       "      <td>G1</td>\n",
       "      <td>4</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21398</th>\n",
       "      <td>P9_B003577</td>\n",
       "      <td>5361</td>\n",
       "      <td>1045655</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003577</td>\n",
       "      <td>P9_B003577</td>\n",
       "      <td>LT_S58</td>\n",
       "      <td>TH179</td>\n",
       "      <td>1955-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>18</td>\n",
       "      <td>0.210836</td>\n",
       "      <td>-0.109527</td>\n",
       "      <td>S</td>\n",
       "      <td>6</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21399</th>\n",
       "      <td>P9_B003643</td>\n",
       "      <td>568</td>\n",
       "      <td>165170</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003643</td>\n",
       "      <td>P9_B003643</td>\n",
       "      <td>LT_S49</td>\n",
       "      <td>TH223</td>\n",
       "      <td>1963-09-02</td>\n",
       "      <td>...</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>0.029372</td>\n",
       "      <td>0.138250</td>\n",
       "      <td>G2M</td>\n",
       "      <td>12</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21400</th>\n",
       "      <td>P9_B003646</td>\n",
       "      <td>1879</td>\n",
       "      <td>540627</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003646</td>\n",
       "      <td>P9_B003646</td>\n",
       "      <td>LT_S52</td>\n",
       "      <td>TH226</td>\n",
       "      <td>1954-12-26</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.154259</td>\n",
       "      <td>0.015758</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21401</th>\n",
       "      <td>P9_B003648</td>\n",
       "      <td>3024</td>\n",
       "      <td>577268</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003648</td>\n",
       "      <td>P9_B003648</td>\n",
       "      <td>LT_S53</td>\n",
       "      <td>TH067</td>\n",
       "      <td>1949-09-29</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.212149</td>\n",
       "      <td>-0.130153</td>\n",
       "      <td>G1</td>\n",
       "      <td>0</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21402</th>\n",
       "      <td>P9_B003656</td>\n",
       "      <td>526</td>\n",
       "      <td>86190</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003656</td>\n",
       "      <td>P9_B003656</td>\n",
       "      <td>LT_S51</td>\n",
       "      <td>TH227</td>\n",
       "      <td>1941-06-18</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>18</td>\n",
       "      <td>-0.092567</td>\n",
       "      <td>0.040308</td>\n",
       "      <td>G2M</td>\n",
       "      <td>6</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21403</th>\n",
       "      <td>P9_B003657</td>\n",
       "      <td>542</td>\n",
       "      <td>101539</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003657</td>\n",
       "      <td>P9_B003657</td>\n",
       "      <td>LT_S49</td>\n",
       "      <td>TH223</td>\n",
       "      <td>1963-09-02</td>\n",
       "      <td>...</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>-0.094790</td>\n",
       "      <td>-0.147624</td>\n",
       "      <td>G1</td>\n",
       "      <td>12</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21404</th>\n",
       "      <td>P9_B003658</td>\n",
       "      <td>3765</td>\n",
       "      <td>227391</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003658</td>\n",
       "      <td>P9_B003658</td>\n",
       "      <td>LT_S50</td>\n",
       "      <td>TH225</td>\n",
       "      <td>1940-06-25</td>\n",
       "      <td>...</td>\n",
       "      <td>10</td>\n",
       "      <td>17</td>\n",
       "      <td>22</td>\n",
       "      <td>23</td>\n",
       "      <td>25</td>\n",
       "      <td>0.027822</td>\n",
       "      <td>-0.213943</td>\n",
       "      <td>S</td>\n",
       "      <td>22</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21405</th>\n",
       "      <td>P9_B003771</td>\n",
       "      <td>2347</td>\n",
       "      <td>765194</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003771</td>\n",
       "      <td>P9_B003771</td>\n",
       "      <td>LT_S71</td>\n",
       "      <td>TH236</td>\n",
       "      <td>1959-07-01</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>-0.210142</td>\n",
       "      <td>-0.260048</td>\n",
       "      <td>G1</td>\n",
       "      <td>4</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21406</th>\n",
       "      <td>P9_B003894</td>\n",
       "      <td>2269</td>\n",
       "      <td>1561743</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003894</td>\n",
       "      <td>P9_B003894</td>\n",
       "      <td>LT_S71</td>\n",
       "      <td>TH236</td>\n",
       "      <td>1959-07-01</td>\n",
       "      <td>...</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>16</td>\n",
       "      <td>-0.110873</td>\n",
       "      <td>-0.176722</td>\n",
       "      <td>G1</td>\n",
       "      <td>16</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21407</th>\n",
       "      <td>P9_B003920</td>\n",
       "      <td>2794</td>\n",
       "      <td>2060395</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003920</td>\n",
       "      <td>P9_B003920</td>\n",
       "      <td>LT_S71</td>\n",
       "      <td>TH236</td>\n",
       "      <td>1959-07-01</td>\n",
       "      <td>...</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>16</td>\n",
       "      <td>-0.182744</td>\n",
       "      <td>-0.259483</td>\n",
       "      <td>G1</td>\n",
       "      <td>16</td>\n",
       "      <td>immune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21408</th>\n",
       "      <td>P9_B003995</td>\n",
       "      <td>2260</td>\n",
       "      <td>1260744</td>\n",
       "      <td>SeuratProject</td>\n",
       "      <td>P9</td>\n",
       "      <td>B003995</td>\n",
       "      <td>P9_B003995</td>\n",
       "      <td>LT_S71</td>\n",
       "      <td>TH236</td>\n",
       "      <td>1959-07-01</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>0.113005</td>\n",
       "      <td>0.044369</td>\n",
       "      <td>S</td>\n",
       "      <td>5</td>\n",
       "      <td>non-immune</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>21409 rows × 58 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           Unnamed: 0  nGene   nReads     orig.ident well       plate  \\\n",
       "0      A10_1001000329   2969   594781  SeuratProject  A10  1001000329   \n",
       "1      A10_1001000407   2265   662644  SeuratProject  A10  1001000407   \n",
       "2      A10_1001000408   7085   602263  SeuratProject  A10  1001000408   \n",
       "3      A10_1001000410   1914   185720  SeuratProject  A10  1001000410   \n",
       "4      A10_1001000412   7274   914254  SeuratProject  A10  1001000412   \n",
       "5         A10_B000420   2464   101565  SeuratProject  A10     B000420   \n",
       "6         A10_B000422   3424   748360  SeuratProject  A10     B000422   \n",
       "7         A10_B000580   3984  3184663  SeuratProject  A10     B000580   \n",
       "8         A10_B000863   1093   391731  SeuratProject  A10     B000863   \n",
       "9         A10_B001007    717   399863  SeuratProject  A10     B001007   \n",
       "10        A10_B001008   3483   993763  SeuratProject  A10     B001008   \n",
       "11        A10_B001010   1911   730050  SeuratProject  A10     B001010   \n",
       "12        A10_B001470   2454   141655  SeuratProject  A10     B001470   \n",
       "13        A10_B001474   1999   165042  SeuratProject  A10     B001474   \n",
       "14        A10_B001545   4933   732697  SeuratProject  A10     B001545   \n",
       "15        A10_B001548   3745  1913208  SeuratProject  A10     B001548   \n",
       "16        A10_B001551   1334    89066  SeuratProject  A10     B001551   \n",
       "17        A10_B001554   2554  2223066  SeuratProject  A10     B001554   \n",
       "18        A10_B001556   2696   108922  SeuratProject  A10     B001556   \n",
       "19        A10_B001557   3016   160550  SeuratProject  A10     B001557   \n",
       "20        A10_B001558   1820  1164138  SeuratProject  A10     B001558   \n",
       "21        A10_B001559   3438   263528  SeuratProject  A10     B001559   \n",
       "22        A10_B001608   1756   216449  SeuratProject  A10     B001608   \n",
       "23        A10_B002073   1060   119872  SeuratProject  A10     B002073   \n",
       "24        A10_B002074   3203   502442  SeuratProject  A10     B002074   \n",
       "25        A10_B002077   1340   198774  SeuratProject  A10     B002077   \n",
       "26        A10_B002078   1944   785772  SeuratProject  A10     B002078   \n",
       "27        A10_B002079   3008   543848  SeuratProject  A10     B002079   \n",
       "28        A10_B002572   6268  1212934  SeuratProject  A10     B002572   \n",
       "29        A10_B003048   1731    51919  SeuratProject  A10     B003048   \n",
       "...               ...    ...      ...            ...  ...         ...   \n",
       "21379      P9_B001608    751   152200  SeuratProject   P9     B001608   \n",
       "21380      P9_B002077   1152   180650  SeuratProject   P9     B002077   \n",
       "21381      P9_B002079   2140   268841  SeuratProject   P9     B002079   \n",
       "21382      P9_B002573   3223  1497817  SeuratProject   P9     B002573   \n",
       "21383      P9_B003048   1555   146047  SeuratProject   P9     B003048   \n",
       "21384      P9_B003067   5160  7901455  SeuratProject   P9     B003067   \n",
       "21385      P9_B003070    754   104624  SeuratProject   P9     B003070   \n",
       "21386      P9_B003071   1445   752933  SeuratProject   P9     B003071   \n",
       "21387      P9_B003105    535   187972  SeuratProject   P9     B003105   \n",
       "21388      P9_B003115   1118  1757656  SeuratProject   P9     B003115   \n",
       "21389      P9_B003125   1222   622796  SeuratProject   P9     B003125   \n",
       "21390      P9_B003126   2803  1860222  SeuratProject   P9     B003126   \n",
       "21391      P9_B003129    950   222677  SeuratProject   P9     B003129   \n",
       "21392      P9_B003132    749   228975  SeuratProject   P9     B003132   \n",
       "21393      P9_B003187   1805   636367  SeuratProject   P9     B003187   \n",
       "21394      P9_B003518   4232   566057  SeuratProject   P9     B003518   \n",
       "21395      P9_B003527   4539  2776906  SeuratProject   P9     B003527   \n",
       "21396      P9_B003528   1348  1044025  SeuratProject   P9     B003528   \n",
       "21397      P9_B003529   1769   857686  SeuratProject   P9     B003529   \n",
       "21398      P9_B003577   5361  1045655  SeuratProject   P9     B003577   \n",
       "21399      P9_B003643    568   165170  SeuratProject   P9     B003643   \n",
       "21400      P9_B003646   1879   540627  SeuratProject   P9     B003646   \n",
       "21401      P9_B003648   3024   577268  SeuratProject   P9     B003648   \n",
       "21402      P9_B003656    526    86190  SeuratProject   P9     B003656   \n",
       "21403      P9_B003657    542   101539  SeuratProject   P9     B003657   \n",
       "21404      P9_B003658   3765   227391  SeuratProject   P9     B003658   \n",
       "21405      P9_B003771   2347   765194  SeuratProject   P9     B003771   \n",
       "21406      P9_B003894   2269  1561743  SeuratProject   P9     B003894   \n",
       "21407      P9_B003920   2794  2060395  SeuratProject   P9     B003920   \n",
       "21408      P9_B003995   2260  1260744  SeuratProject   P9     B003995   \n",
       "\n",
       "              cell_id sample_name patient_id         DOB  ... res.0.1 res.0.3  \\\n",
       "0      A10_1001000329      LT_S07      TH103  1971-07-27  ...       0       1   \n",
       "1      A10_1001000407      LT_S21      TH185  1961-12-29  ...       2       4   \n",
       "2      A10_1001000408      LT_S21      TH185  1961-12-29  ...       6       7   \n",
       "3      A10_1001000410      LT_S21      TH185  1961-12-29  ...       1       0   \n",
       "4      A10_1001000412      LT_S21      TH185  1961-12-29  ...       6       7   \n",
       "5         A10_B000420      LT_S66      TH238  1949-08-25  ...       0      11   \n",
       "6         A10_B000422      LT_S66      TH238  1949-08-25  ...       0       1   \n",
       "7         A10_B000580      LT_S50      TH225  1940-06-25  ...       0       1   \n",
       "8         A10_B000863      LT_S47      TH220  1946-09-18  ...       2       4   \n",
       "9         A10_B001007      LT_S82      TH226  1954-12-26  ...      10      15   \n",
       "10        A10_B001008      LT_S82      TH226  1954-12-26  ...       0       1   \n",
       "11        A10_B001010      LT_S82      TH226  1954-12-26  ...       1       0   \n",
       "12        A10_B001470      LT_S74      TH248  1965-05-04  ...       2       3   \n",
       "13        A10_B001474      LT_S74      TH248  1965-05-04  ...       3       2   \n",
       "14        A10_B001545      LT_S79      TH179  1955-11-05  ...       2       4   \n",
       "15        A10_B001548      LT_S79      TH179  1955-11-05  ...       5       6   \n",
       "16        A10_B001551      LT_S80      TH179  1955-11-05  ...       0       1   \n",
       "17        A10_B001554      LT_S78  TH179_NAT  1955-11-05  ...       5       6   \n",
       "18        A10_B001556      LT_S80      TH179  1955-11-05  ...       4       5   \n",
       "19        A10_B001557      LT_S80      TH179  1955-11-05  ...       4       5   \n",
       "20        A10_B001558      LT_S78  TH179_NAT  1955-11-05  ...       5       6   \n",
       "21        A10_B001559      LT_S80      TH179  1955-11-05  ...       0      11   \n",
       "22        A10_B001608      LT_S28      TH185  1961-12-29  ...       1       0   \n",
       "23        A10_B002073      LT_S65  TH238_NAT  1949-08-25  ...       0       1   \n",
       "24        A10_B002074      LT_S65  TH238_NAT  1949-08-25  ...       0       1   \n",
       "25        A10_B002077      LT_S66      TH238  1949-08-25  ...       1       0   \n",
       "26        A10_B002078      LT_S66      TH238  1949-08-25  ...       1       0   \n",
       "27        A10_B002079      LT_S66      TH238  1949-08-25  ...      11      12   \n",
       "28        A10_B002572      LT_S81      TH266  1978-09-20  ...       5       6   \n",
       "29        A10_B003048      LT_S23      TH185  1961-12-29  ...       1       0   \n",
       "...               ...         ...        ...         ...  ...     ...     ...   \n",
       "21379      P9_B001608      LT_S28      TH185  1961-12-29  ...       1       0   \n",
       "21380      P9_B002077      LT_S66      TH238  1949-08-25  ...      11      12   \n",
       "21381      P9_B002079      LT_S66      TH238  1949-08-25  ...       0      11   \n",
       "21382      P9_B002573      LT_S81      TH266  1978-09-20  ...       3       2   \n",
       "21383      P9_B003048      LT_S23      TH185  1961-12-29  ...       1       0   \n",
       "21384      P9_B003067      LT_S34      TH205  1964-05-24  ...       3       2   \n",
       "21385      P9_B003070      LT_S34      TH205  1964-05-24  ...       0       1   \n",
       "21386      P9_B003071      LT_S34      TH205  1964-05-24  ...       5       6   \n",
       "21387      P9_B003105      LT_S72      TH222  1959-03-28  ...       9      10   \n",
       "21388      P9_B003115      LT_S55      TH218  1958-11-20  ...       4       5   \n",
       "21389      P9_B003125      LT_S57      TH226  1954-12-26  ...       9      10   \n",
       "21390      P9_B003126      LT_S57      TH226  1954-12-26  ...       0       1   \n",
       "21391      P9_B003129      LT_S57      TH226  1954-12-26  ...       9      10   \n",
       "21392      P9_B003132      LT_S57      TH226  1954-12-26  ...       9      10   \n",
       "21393      P9_B003187      LT_S57      TH226  1954-12-26  ...       5       6   \n",
       "21394      P9_B003518      LT_S63      TH171  1976-08-20  ...       8       9   \n",
       "21395      P9_B003527      LT_S63      TH171  1976-08-20  ...       8       9   \n",
       "21396      P9_B003528      LT_S58      TH179  1955-11-05  ...       3       2   \n",
       "21397      P9_B003529      LT_S63      TH171  1976-08-20  ...       4       5   \n",
       "21398      P9_B003577      LT_S58      TH179  1955-11-05  ...       2       3   \n",
       "21399      P9_B003643      LT_S49      TH223  1963-09-02  ...       9      10   \n",
       "21400      P9_B003646      LT_S52      TH226  1954-12-26  ...       0       1   \n",
       "21401      P9_B003648      LT_S53      TH067  1949-09-29  ...       0       1   \n",
       "21402      P9_B003656      LT_S51      TH227  1941-06-18  ...       2       3   \n",
       "21403      P9_B003657      LT_S49      TH223  1963-09-02  ...       9      10   \n",
       "21404      P9_B003658      LT_S50      TH225  1940-06-25  ...      10      17   \n",
       "21405      P9_B003771      LT_S71      TH236  1959-07-01  ...       4       5   \n",
       "21406      P9_B003894      LT_S71      TH236  1959-07-01  ...      11      12   \n",
       "21407      P9_B003920      LT_S71      TH236  1959-07-01  ...      11      12   \n",
       "21408      P9_B003995      LT_S71      TH236  1959-07-01  ...       5       6   \n",
       "\n",
       "      res.0.5 res.0.7 res.0.9   S.Score G2M.Score Phase main_seurat_cluster  \\\n",
       "0           0      15      15 -0.133188 -0.421943    G1                   0   \n",
       "1           3       4       2 -0.185666 -0.063043    G1                   3   \n",
       "2           8       8       9 -0.180289 -0.535851    G1                   8   \n",
       "3          10      10       8 -0.005365 -0.231767    G1                  10   \n",
       "4           8       8       9 -0.377089 -0.492793    G1                   8   \n",
       "5          13      13      13 -0.200227  0.030644   G2M                  13   \n",
       "6           0       0       0  0.141943  0.025751     S                   0   \n",
       "7           0       0       0  0.117926 -0.303089     S                   0   \n",
       "8           3       4       2 -0.124229 -0.203540    G1                   3   \n",
       "9          19      19      22  0.151958 -0.111904     S                  19   \n",
       "10          0       0       0 -0.257014 -0.217378    G1                   0   \n",
       "11          1       1       1  0.004001  0.422330   G2M                   1   \n",
       "12          6       7      19 -0.126570 -0.392595    G1                   6   \n",
       "13         14      16      14  0.037971 -0.312922     S                  14   \n",
       "14          3       4       2 -0.246892 -0.270221    G1                   3   \n",
       "15          5       6       5  0.077639 -0.227272     S                   5   \n",
       "16          0       3      15 -0.214978 -0.307657    G1                   0   \n",
       "17          5       6       5 -0.026382 -0.139074    G1                   5   \n",
       "18          4       5       3  0.042820 -0.131778     S                   4   \n",
       "19          4       5       3  0.026602 -0.399065     S                   4   \n",
       "20          5       6       5 -0.077484 -0.260795    G1                   5   \n",
       "21         13      13      13 -0.143188 -0.072381    G1                  13   \n",
       "22         10      10       8  0.159249  0.144014     S                  10   \n",
       "23          0       3      15 -0.064271 -0.201847    G1                   0   \n",
       "24          0       0       0 -0.321349 -0.193131    G1                   0   \n",
       "25          1       9       7  0.020655  0.030858   G2M                   1   \n",
       "26          1       1       1 -0.263965 -0.162814    G1                   1   \n",
       "27         16      17      16 -0.080785 -0.078821    G1                  16   \n",
       "28          5       6       5 -0.371432 -0.427129    G1                   5   \n",
       "29         10      10       8  0.140251 -0.038638     S                  10   \n",
       "...       ...     ...     ...       ...       ...   ...                 ...   \n",
       "21379      15       9       7  0.095965 -0.234211     S                  15   \n",
       "21380      16      17      16 -0.198534 -0.031337    G1                  16   \n",
       "21381      13      13      13 -0.220345 -0.245248    G1                  13   \n",
       "21382      14      16      14 -0.026880 -0.063479    G1                  14   \n",
       "21383      10      10       8  0.109896 -0.146143     S                  10   \n",
       "21384       2       2       4  0.048504 -0.108903     S                   2   \n",
       "21385       0       3      15 -0.160681  0.014570   G2M                   0   \n",
       "21386       5       6       5 -0.073801 -0.246584    G1                   5   \n",
       "21387      12      12      12 -0.089607 -0.093771    G1                  12   \n",
       "21388       4       5       3 -0.058187 -0.002288    G1                   4   \n",
       "21389      12      12      12 -0.196344  0.007299   G2M                  12   \n",
       "21390       7       3       6 -0.267222 -0.069372    G1                   7   \n",
       "21391      12      12      12 -0.039355 -0.042824    G1                  12   \n",
       "21392      12      12      12  0.043213 -0.125724     S                  12   \n",
       "21393       5       6       5 -0.224999  0.011019   G2M                   5   \n",
       "21394      11      11      11 -0.494101 -0.249648    G1                  11   \n",
       "21395      11      11      11 -0.121439 -0.245387    G1                  11   \n",
       "21396      14      16      14 -0.060097 -0.235275    G1                  14   \n",
       "21397       4       5       3 -0.070309 -0.146753    G1                   4   \n",
       "21398       6       7      18  0.210836 -0.109527     S                   6   \n",
       "21399      12      12      12  0.029372  0.138250   G2M                  12   \n",
       "21400       0       0       0  0.154259  0.015758     S                   0   \n",
       "21401       0       0       0 -0.212149 -0.130153    G1                   0   \n",
       "21402       6       7      18 -0.092567  0.040308   G2M                   6   \n",
       "21403      12      12      12 -0.094790 -0.147624    G1                  12   \n",
       "21404      22      23      25  0.027822 -0.213943     S                  22   \n",
       "21405       4       5       3 -0.210142 -0.260048    G1                   4   \n",
       "21406      16      17      16 -0.110873 -0.176722    G1                  16   \n",
       "21407      16      17      16 -0.182744 -0.259483    G1                  16   \n",
       "21408       5       6       5  0.113005  0.044369     S                   5   \n",
       "\n",
       "      immune_annotation  \n",
       "0                immune  \n",
       "1            non-immune  \n",
       "2            non-immune  \n",
       "3                immune  \n",
       "4            non-immune  \n",
       "5                immune  \n",
       "6                immune  \n",
       "7                immune  \n",
       "8            non-immune  \n",
       "9            non-immune  \n",
       "10               immune  \n",
       "11               immune  \n",
       "12           non-immune  \n",
       "13           non-immune  \n",
       "14           non-immune  \n",
       "15           non-immune  \n",
       "16               immune  \n",
       "17           non-immune  \n",
       "18               immune  \n",
       "19               immune  \n",
       "20           non-immune  \n",
       "21               immune  \n",
       "22               immune  \n",
       "23               immune  \n",
       "24               immune  \n",
       "25               immune  \n",
       "26               immune  \n",
       "27               immune  \n",
       "28           non-immune  \n",
       "29               immune  \n",
       "...                 ...  \n",
       "21379            immune  \n",
       "21380            immune  \n",
       "21381            immune  \n",
       "21382        non-immune  \n",
       "21383            immune  \n",
       "21384        non-immune  \n",
       "21385            immune  \n",
       "21386        non-immune  \n",
       "21387            immune  \n",
       "21388            immune  \n",
       "21389            immune  \n",
       "21390            immune  \n",
       "21391            immune  \n",
       "21392            immune  \n",
       "21393        non-immune  \n",
       "21394        non-immune  \n",
       "21395        non-immune  \n",
       "21396        non-immune  \n",
       "21397            immune  \n",
       "21398        non-immune  \n",
       "21399            immune  \n",
       "21400            immune  \n",
       "21401            immune  \n",
       "21402        non-immune  \n",
       "21403            immune  \n",
       "21404        non-immune  \n",
       "21405            immune  \n",
       "21406            immune  \n",
       "21407            immune  \n",
       "21408        non-immune  \n",
       "\n",
       "[21409 rows x 58 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta_x_cell = pd.read_csv('../metadata_input/metadata_all_cells.csv')\n",
    "meta_x_cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "samples_we_care_about = ['LT_S72', 'LT_S47', 'LT_S23', 'LT_S21', 'LT_S78', 'LT_S80', 'LT_S79', \n",
    "                         'LT_S58', 'LT_S63', 'LT_S48', 'LT_S16', 'LT_S08', 'LT_S07', 'LT_S05', \n",
    "                         'LT_S53', 'LT_S57', 'LT_S71', 'LT_S41', 'LT_S13', 'LT_S11', 'LT_S42', \n",
    "                         'LT_S01', 'LT_S55', 'LT_S52', 'LT_S45', 'LT_S75', 'LT_S74', 'LT_S69', \n",
    "                         'LT_S65', 'LT_S66', 'LT_S56', 'LT_S51', 'LT_S50', 'LT_S49', 'LT_S43', 'LT_S34']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "patients_we_care_about = []\n",
    "\n",
    "for idx, row in meta_x_cell.iterrows():\n",
    "    curr_sample = row.sample_name\n",
    "    curr_patient = row.patient_id\n",
    "    \n",
    "    if curr_sample in samples_we_care_about:\n",
    "        patients_we_care_about.append(curr_patient)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TH067',\n",
       " 'TH103',\n",
       " 'TH146',\n",
       " 'TH155',\n",
       " 'TH158',\n",
       " 'TH169',\n",
       " 'TH171',\n",
       " 'TH179',\n",
       " 'TH179_NAT',\n",
       " 'TH185',\n",
       " 'TH205',\n",
       " 'TH210',\n",
       " 'TH217',\n",
       " 'TH218',\n",
       " 'TH220',\n",
       " 'TH222',\n",
       " 'TH223',\n",
       " 'TH225',\n",
       " 'TH226',\n",
       " 'TH227',\n",
       " 'TH231',\n",
       " 'TH236',\n",
       " 'TH238',\n",
       " 'TH238_NAT',\n",
       " 'TH248',\n",
       " 'TH266'}"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "patients_we_care_about_u = set(patients_we_care_about)\n",
    "patients_we_care_about_u"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TH067',\n",
       " 'TH146',\n",
       " 'TH155',\n",
       " 'TH158',\n",
       " 'TH169',\n",
       " 'TH171',\n",
       " 'TH179',\n",
       " 'TH205',\n",
       " 'TH210',\n",
       " 'TH217',\n",
       " 'TH218',\n",
       " 'TH220',\n",
       " 'TH222',\n",
       " 'TH226',\n",
       " 'TH227',\n",
       " 'TH231',\n",
       " 'TH236',\n",
       " 'TH238',\n",
       " 'TH248',\n",
       " 'TH266'}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# these are the ones we have WES for\n",
    "relevant_patients_we_have_WES_for = patients_w_WES_u.intersection(patients_we_care_about_u)\n",
    "relevant_patients_we_have_WES_for"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TH103', 'TH179_NAT', 'TH185', 'TH223', 'TH225', 'TH238_NAT'}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# these are the ones we're missing\n",
    "    # actually only 4\n",
    "patients_we_care_about_u.difference(patients_w_WES_u)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "20\n"
     ]
    }
   ],
   "source": [
    "# number of patients we have WES for \n",
    "print(len(relevant_patients_we_have_WES_for))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 331,
   "metadata": {},
   "outputs": [],
   "source": [
    "#//////////////////////////////////////////////////////////////////////////////\n",
    "#//////////////////////////////////////////////////////////////////////////////\n",
    "#///    take a look at WES mutations data, generated with cerebra    //////////\n",
    "#//////////////////////////////////////////////////////////////////////////////"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AKT1_E242E</th>\n",
       "      <th>ALK_A1200A</th>\n",
       "      <th>ALK_K1491R</th>\n",
       "      <th>ALK_R1436C</th>\n",
       "      <th>ALK_T1012T</th>\n",
       "      <th>BRAF_G643G</th>\n",
       "      <th>BRAF_V600E</th>\n",
       "      <th>BRCA2_K1132K</th>\n",
       "      <th>BRCA2_N289H</th>\n",
       "      <th>BRCA2_N372H</th>\n",
       "      <th>...</th>\n",
       "      <th>SPTA1_A2365A</th>\n",
       "      <th>SPTA1_K1693Q</th>\n",
       "      <th>SPTA1_L1858V</th>\n",
       "      <th>SPTA1_L958L</th>\n",
       "      <th>SPTA1_P1986P</th>\n",
       "      <th>TET2_I1762V</th>\n",
       "      <th>TP53_P72R</th>\n",
       "      <th>TP53_R273C</th>\n",
       "      <th>TP53_Y236C</th>\n",
       "      <th>TPR_S960N</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TH041_E1_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>88:178</td>\n",
       "      <td>...</td>\n",
       "      <td>77:77</td>\n",
       "      <td>130:130</td>\n",
       "      <td>0:0</td>\n",
       "      <td>75:75</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>63:127</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>70:161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067_E3_WB3</th>\n",
       "      <td>315:707</td>\n",
       "      <td>0:0</td>\n",
       "      <td>298:673</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>87:147</td>\n",
       "      <td>0:0</td>\n",
       "      <td>60:121</td>\n",
       "      <td>...</td>\n",
       "      <td>169:337</td>\n",
       "      <td>0:0</td>\n",
       "      <td>63:122</td>\n",
       "      <td>0:0</td>\n",
       "      <td>109:223</td>\n",
       "      <td>522:523</td>\n",
       "      <td>640:640</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067_E3_WB4</th>\n",
       "      <td>115:196</td>\n",
       "      <td>0:0</td>\n",
       "      <td>79:185</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>85:172</td>\n",
       "      <td>0:0</td>\n",
       "      <td>80:142</td>\n",
       "      <td>...</td>\n",
       "      <td>60:106</td>\n",
       "      <td>0:0</td>\n",
       "      <td>33:61</td>\n",
       "      <td>0:0</td>\n",
       "      <td>33:69</td>\n",
       "      <td>118:118</td>\n",
       "      <td>137:137</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067_E4</th>\n",
       "      <td>172:375</td>\n",
       "      <td>0:0</td>\n",
       "      <td>212:448</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>147:253</td>\n",
       "      <td>0:0</td>\n",
       "      <td>126:260</td>\n",
       "      <td>...</td>\n",
       "      <td>132:223</td>\n",
       "      <td>0:0</td>\n",
       "      <td>91:201</td>\n",
       "      <td>0:0</td>\n",
       "      <td>115:235</td>\n",
       "      <td>286:286</td>\n",
       "      <td>407:407</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067_E7</th>\n",
       "      <td>340:706</td>\n",
       "      <td>0:0</td>\n",
       "      <td>374:611</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>238:353</td>\n",
       "      <td>0:0</td>\n",
       "      <td>116:344</td>\n",
       "      <td>...</td>\n",
       "      <td>263:407</td>\n",
       "      <td>0:0</td>\n",
       "      <td>176:292</td>\n",
       "      <td>0:0</td>\n",
       "      <td>234:362</td>\n",
       "      <td>512:512</td>\n",
       "      <td>469:469</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH107_E2_WB2</th>\n",
       "      <td>74:136</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>95:179</td>\n",
       "      <td>78:151</td>\n",
       "      <td>74:132</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>89:179</td>\n",
       "      <td>...</td>\n",
       "      <td>52:103</td>\n",
       "      <td>0:0</td>\n",
       "      <td>31:69</td>\n",
       "      <td>65:109</td>\n",
       "      <td>29:57</td>\n",
       "      <td>80:142</td>\n",
       "      <td>73:179</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH116_E2</th>\n",
       "      <td>417:417</td>\n",
       "      <td>0:0</td>\n",
       "      <td>615:615</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>235:235</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>128:390</td>\n",
       "      <td>0:0</td>\n",
       "      <td>76:218</td>\n",
       "      <td>0:0</td>\n",
       "      <td>76:245</td>\n",
       "      <td>0:0</td>\n",
       "      <td>448:448</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH116_E4_WB1</th>\n",
       "      <td>189:189</td>\n",
       "      <td>0:0</td>\n",
       "      <td>200:200</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>109:110</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>71:148</td>\n",
       "      <td>0:0</td>\n",
       "      <td>38:83</td>\n",
       "      <td>0:0</td>\n",
       "      <td>46:98</td>\n",
       "      <td>0:0</td>\n",
       "      <td>136:136</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH146_E7_WB1</th>\n",
       "      <td>68:150</td>\n",
       "      <td>0:0</td>\n",
       "      <td>84:165</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>68:117</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>32:65</td>\n",
       "      <td>0:0</td>\n",
       "      <td>33:60</td>\n",
       "      <td>0:0</td>\n",
       "      <td>43:92</td>\n",
       "      <td>0:0</td>\n",
       "      <td>52:110</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH150_E1_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>55:101</td>\n",
       "      <td>0:0</td>\n",
       "      <td>104:104</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>81:150</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH153_E6_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>433:433</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>216:362</td>\n",
       "      <td>...</td>\n",
       "      <td>112:234</td>\n",
       "      <td>120:262</td>\n",
       "      <td>0:0</td>\n",
       "      <td>66:159</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>173:413</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH155_E3_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>51:124</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>53:99</td>\n",
       "      <td>0:0</td>\n",
       "      <td>53:111</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>46:75</td>\n",
       "      <td>66:125</td>\n",
       "      <td>0:0</td>\n",
       "      <td>43:90</td>\n",
       "      <td>0:0</td>\n",
       "      <td>40:94</td>\n",
       "      <td>62:124</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH155_E5</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>76:306</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>210:259</td>\n",
       "      <td>0:0</td>\n",
       "      <td>55:222</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>140:276</td>\n",
       "      <td>152:372</td>\n",
       "      <td>0:0</td>\n",
       "      <td>108:256</td>\n",
       "      <td>0:0</td>\n",
       "      <td>45:261</td>\n",
       "      <td>37:228</td>\n",
       "      <td>0:0</td>\n",
       "      <td>55:71</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH156_E2_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>1165:1165</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>54:114</td>\n",
       "      <td>...</td>\n",
       "      <td>574:574</td>\n",
       "      <td>0:0</td>\n",
       "      <td>219:219</td>\n",
       "      <td>0:0</td>\n",
       "      <td>477:478</td>\n",
       "      <td>0:0</td>\n",
       "      <td>1038:1038</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH158_E2_WB3</th>\n",
       "      <td>72:148</td>\n",
       "      <td>0:0</td>\n",
       "      <td>75:170</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>60:124</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>49:92</td>\n",
       "      <td>43:86</td>\n",
       "      <td>0:0</td>\n",
       "      <td>24:63</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>92:162</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169_E2</th>\n",
       "      <td>364:364</td>\n",
       "      <td>0:0</td>\n",
       "      <td>195:404</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>116:246</td>\n",
       "      <td>0:0</td>\n",
       "      <td>174:329</td>\n",
       "      <td>129:258</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>99:196</td>\n",
       "      <td>256:256</td>\n",
       "      <td>0:0</td>\n",
       "      <td>170:170</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>411:411</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169_E4</th>\n",
       "      <td>465:465</td>\n",
       "      <td>0:0</td>\n",
       "      <td>234:436</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>117:271</td>\n",
       "      <td>0:0</td>\n",
       "      <td>186:346</td>\n",
       "      <td>159:344</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>130:211</td>\n",
       "      <td>330:330</td>\n",
       "      <td>0:0</td>\n",
       "      <td>234:235</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>469:469</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169_E6_WB1</th>\n",
       "      <td>169:169</td>\n",
       "      <td>0:0</td>\n",
       "      <td>67:157</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>75:121</td>\n",
       "      <td>0:0</td>\n",
       "      <td>60:149</td>\n",
       "      <td>67:129</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>51:98</td>\n",
       "      <td>145:145</td>\n",
       "      <td>0:0</td>\n",
       "      <td>46:46</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>144:144</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH171_E1_WB1</th>\n",
       "      <td>95:180</td>\n",
       "      <td>88:144</td>\n",
       "      <td>200:201</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>79:128</td>\n",
       "      <td>0:0</td>\n",
       "      <td>95:190</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>45:109</td>\n",
       "      <td>62:105</td>\n",
       "      <td>0:0</td>\n",
       "      <td>58:103</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>62:162</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH171_E3</th>\n",
       "      <td>211:490</td>\n",
       "      <td>183:413</td>\n",
       "      <td>470:470</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>167:313</td>\n",
       "      <td>0:0</td>\n",
       "      <td>172:361</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>171:326</td>\n",
       "      <td>166:337</td>\n",
       "      <td>0:0</td>\n",
       "      <td>109:209</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>207:402</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH172_E2_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>55:100</td>\n",
       "      <td>0:0</td>\n",
       "      <td>69:118</td>\n",
       "      <td>0:0</td>\n",
       "      <td>90:155</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>43:98</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>60:110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH172_E3</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>178:310</td>\n",
       "      <td>0:0</td>\n",
       "      <td>178:347</td>\n",
       "      <td>0:0</td>\n",
       "      <td>173:373</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>186:371</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>159:327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH174_E1_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>240:496</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>98:219</td>\n",
       "      <td>0:0</td>\n",
       "      <td>258:509</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH178_E2_WB1</th>\n",
       "      <td>27:51</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>26:63</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>45:45</td>\n",
       "      <td>21:48</td>\n",
       "      <td>16:29</td>\n",
       "      <td>6:19</td>\n",
       "      <td>14:29</td>\n",
       "      <td>0:0</td>\n",
       "      <td>93:93</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179_E1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>29:129</td>\n",
       "      <td>183:361</td>\n",
       "      <td>0:0</td>\n",
       "      <td>165:336</td>\n",
       "      <td>...</td>\n",
       "      <td>104:240</td>\n",
       "      <td>0:0</td>\n",
       "      <td>78:171</td>\n",
       "      <td>0:0</td>\n",
       "      <td>94:193</td>\n",
       "      <td>164:337</td>\n",
       "      <td>159:328</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179_E1_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>97:170</td>\n",
       "      <td>0:0</td>\n",
       "      <td>110:197</td>\n",
       "      <td>...</td>\n",
       "      <td>55:94</td>\n",
       "      <td>0:0</td>\n",
       "      <td>45:93</td>\n",
       "      <td>0:0</td>\n",
       "      <td>52:90</td>\n",
       "      <td>73:148</td>\n",
       "      <td>68:153</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH183_E2_WB1</th>\n",
       "      <td>106:199</td>\n",
       "      <td>0:0</td>\n",
       "      <td>236:236</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>86:167</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>89:197</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>184:184</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH187_E2_WB1</th>\n",
       "      <td>116:206</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>93:152</td>\n",
       "      <td>79:158</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>56:122</td>\n",
       "      <td>62:125</td>\n",
       "      <td>0:0</td>\n",
       "      <td>62:103</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH187_E3</th>\n",
       "      <td>294:451</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>197:409</td>\n",
       "      <td>164:260</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>113:216</td>\n",
       "      <td>147:297</td>\n",
       "      <td>0:0</td>\n",
       "      <td>172:313</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>68:355</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH199_E3_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>163:352</td>\n",
       "      <td>0:0</td>\n",
       "      <td>201:397</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>407:407</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>178:397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH205_E1_WB1</th>\n",
       "      <td>217:217</td>\n",
       "      <td>0:0</td>\n",
       "      <td>236:236</td>\n",
       "      <td>0:0</td>\n",
       "      <td>126:231</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>78:177</td>\n",
       "      <td>0:0</td>\n",
       "      <td>118:213</td>\n",
       "      <td>...</td>\n",
       "      <td>97:170</td>\n",
       "      <td>87:190</td>\n",
       "      <td>0:0</td>\n",
       "      <td>54:110</td>\n",
       "      <td>0:0</td>\n",
       "      <td>97:192</td>\n",
       "      <td>83:210</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH205_E2</th>\n",
       "      <td>504:504</td>\n",
       "      <td>0:0</td>\n",
       "      <td>444:444</td>\n",
       "      <td>0:0</td>\n",
       "      <td>196:396</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>165:340</td>\n",
       "      <td>0:0</td>\n",
       "      <td>209:419</td>\n",
       "      <td>...</td>\n",
       "      <td>99:273</td>\n",
       "      <td>136:318</td>\n",
       "      <td>0:0</td>\n",
       "      <td>134:296</td>\n",
       "      <td>0:0</td>\n",
       "      <td>202:418</td>\n",
       "      <td>177:401</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH208_E2_WB1</th>\n",
       "      <td>161:161</td>\n",
       "      <td>0:0</td>\n",
       "      <td>186:186</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>48:102</td>\n",
       "      <td>54:122</td>\n",
       "      <td>0:0</td>\n",
       "      <td>42:76</td>\n",
       "      <td>0:0</td>\n",
       "      <td>150:150</td>\n",
       "      <td>83:182</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH208_E3</th>\n",
       "      <td>384:384</td>\n",
       "      <td>0:0</td>\n",
       "      <td>481:481</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>156:315</td>\n",
       "      <td>170:337</td>\n",
       "      <td>0:0</td>\n",
       "      <td>111:214</td>\n",
       "      <td>0:0</td>\n",
       "      <td>375:377</td>\n",
       "      <td>154:400</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH208_E4</th>\n",
       "      <td>612:612</td>\n",
       "      <td>0:0</td>\n",
       "      <td>771:771</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>185:402</td>\n",
       "      <td>246:497</td>\n",
       "      <td>0:0</td>\n",
       "      <td>157:328</td>\n",
       "      <td>0:0</td>\n",
       "      <td>518:518</td>\n",
       "      <td>290:686</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH210_E1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>242:519</td>\n",
       "      <td>0:0</td>\n",
       "      <td>151:351</td>\n",
       "      <td>267:267</td>\n",
       "      <td>0:0</td>\n",
       "      <td>175:311</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH210_E2_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>106:215</td>\n",
       "      <td>0:0</td>\n",
       "      <td>84:149</td>\n",
       "      <td>170:170</td>\n",
       "      <td>0:0</td>\n",
       "      <td>81:183</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH217_E1_WB1</th>\n",
       "      <td>588:588</td>\n",
       "      <td>0:0</td>\n",
       "      <td>306:611</td>\n",
       "      <td>0:0</td>\n",
       "      <td>227:440</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>205:387</td>\n",
       "      <td>0:0</td>\n",
       "      <td>243:426</td>\n",
       "      <td>...</td>\n",
       "      <td>128:284</td>\n",
       "      <td>0:0</td>\n",
       "      <td>91:198</td>\n",
       "      <td>129:270</td>\n",
       "      <td>130:263</td>\n",
       "      <td>0:0</td>\n",
       "      <td>285:567</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH218_E3_WB1</th>\n",
       "      <td>111:218</td>\n",
       "      <td>0:0</td>\n",
       "      <td>223:223</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>118:213</td>\n",
       "      <td>0:0</td>\n",
       "      <td>101:224</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>132:132</td>\n",
       "      <td>88:191</td>\n",
       "      <td>50:109</td>\n",
       "      <td>61:123</td>\n",
       "      <td>61:113</td>\n",
       "      <td>97:217</td>\n",
       "      <td>94:202</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH220_E1_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>83:184</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>86:86</td>\n",
       "      <td>0:0</td>\n",
       "      <td>60:60</td>\n",
       "      <td>0:0</td>\n",
       "      <td>80:80</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>62:130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH220_E2</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>548:802</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>289:289</td>\n",
       "      <td>0:0</td>\n",
       "      <td>234:234</td>\n",
       "      <td>0:0</td>\n",
       "      <td>224:224</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>125:365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH222_E1_CBC1</th>\n",
       "      <td>173:347</td>\n",
       "      <td>0:0</td>\n",
       "      <td>353:353</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>323:323</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>106:243</td>\n",
       "      <td>119:259</td>\n",
       "      <td>0:0</td>\n",
       "      <td>107:211</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>372:372</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH226_E3</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>150:245</td>\n",
       "      <td>0:0</td>\n",
       "      <td>146:278</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>163:322</td>\n",
       "      <td>390:390</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>249:249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH226_E3_WB4</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>84:150</td>\n",
       "      <td>0:0</td>\n",
       "      <td>78:180</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>65:149</td>\n",
       "      <td>181:181</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>144:144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH227_E2_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>114:232</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>136:258</td>\n",
       "      <td>...</td>\n",
       "      <td>61:180</td>\n",
       "      <td>120:226</td>\n",
       "      <td>0:0</td>\n",
       "      <td>72:171</td>\n",
       "      <td>0:0</td>\n",
       "      <td>150:289</td>\n",
       "      <td>188:338</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH231_E1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>201:444</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>171:298</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>227:388</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>167:347</td>\n",
       "      <td>330:330</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH231_E4_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>84:187</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>93:141</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>104:168</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>83:149</td>\n",
       "      <td>169:169</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH236_E4_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>134:286</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>468:468</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH238_E1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>677:677</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>286:568</td>\n",
       "      <td>...</td>\n",
       "      <td>173:379</td>\n",
       "      <td>195:470</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>290:615</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH238_E1_WB1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>270:270</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>122:238</td>\n",
       "      <td>...</td>\n",
       "      <td>95:169</td>\n",
       "      <td>111:180</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>124:239</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH248_E3_WB1</th>\n",
       "      <td>291:619</td>\n",
       "      <td>0:0</td>\n",
       "      <td>302:660</td>\n",
       "      <td>0:0</td>\n",
       "      <td>240:482</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>402:404</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>317:317</td>\n",
       "      <td>0:0</td>\n",
       "      <td>204:204</td>\n",
       "      <td>0:0</td>\n",
       "      <td>262:262</td>\n",
       "      <td>0:0</td>\n",
       "      <td>564:564</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH266_E1_WB1</th>\n",
       "      <td>175:374</td>\n",
       "      <td>0:0</td>\n",
       "      <td>215:409</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>153:300</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>179:377</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>52 rows × 64 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              AKT1_E242E ALK_A1200A ALK_K1491R ALK_R1436C ALK_T1012T  \\\n",
       "sample                                                                 \n",
       "TH041_E1_WB1         0:0        0:0        0:0        0:0        0:0   \n",
       "TH067_E3_WB3     315:707        0:0    298:673        0:0        0:0   \n",
       "TH067_E3_WB4     115:196        0:0     79:185        0:0        0:0   \n",
       "TH067_E4         172:375        0:0    212:448        0:0        0:0   \n",
       "TH067_E7         340:706        0:0    374:611        0:0        0:0   \n",
       "TH107_E2_WB2      74:136        0:0        0:0     95:179     78:151   \n",
       "TH116_E2         417:417        0:0    615:615        0:0        0:0   \n",
       "TH116_E4_WB1     189:189        0:0    200:200        0:0        0:0   \n",
       "TH146_E7_WB1      68:150        0:0     84:165        0:0        0:0   \n",
       "TH150_E1_WB1         0:0        0:0        0:0        0:0        0:0   \n",
       "TH153_E6_WB1         0:0        0:0    433:433        0:0        0:0   \n",
       "TH155_E3_WB1         0:0        0:0     51:124        0:0        0:0   \n",
       "TH155_E5             0:0        0:0     76:306        0:0        0:0   \n",
       "TH156_E2_WB1         0:0        0:0  1165:1165        0:0        0:0   \n",
       "TH158_E2_WB3      72:148        0:0     75:170        0:0        0:0   \n",
       "TH169_E2         364:364        0:0    195:404        0:0        0:0   \n",
       "TH169_E4         465:465        0:0    234:436        0:0        0:0   \n",
       "TH169_E6_WB1     169:169        0:0     67:157        0:0        0:0   \n",
       "TH171_E1_WB1      95:180     88:144    200:201        0:0        0:0   \n",
       "TH171_E3         211:490    183:413    470:470        0:0        0:0   \n",
       "TH172_E2_WB1         0:0        0:0        0:0        0:0        0:0   \n",
       "TH172_E3             0:0        0:0        0:0        0:0        0:0   \n",
       "TH174_E1_WB1         0:0        0:0    240:496        0:0        0:0   \n",
       "TH178_E2_WB1       27:51        0:0        0:0        0:0        0:0   \n",
       "TH179_E1             0:0        0:0        0:0        0:0        0:0   \n",
       "TH179_E1_WB1         0:0        0:0        0:0        0:0        0:0   \n",
       "TH183_E2_WB1     106:199        0:0    236:236        0:0        0:0   \n",
       "TH187_E2_WB1     116:206        0:0        0:0        0:0        0:0   \n",
       "TH187_E3         294:451        0:0        0:0        0:0        0:0   \n",
       "TH199_E3_WB1         0:0        0:0        0:0        0:0        0:0   \n",
       "TH205_E1_WB1     217:217        0:0    236:236        0:0    126:231   \n",
       "TH205_E2         504:504        0:0    444:444        0:0    196:396   \n",
       "TH208_E2_WB1     161:161        0:0    186:186        0:0        0:0   \n",
       "TH208_E3         384:384        0:0    481:481        0:0        0:0   \n",
       "TH208_E4         612:612        0:0    771:771        0:0        0:0   \n",
       "TH210_E1             0:0        0:0    242:519        0:0    151:351   \n",
       "TH210_E2_WB1         0:0        0:0    106:215        0:0     84:149   \n",
       "TH217_E1_WB1     588:588        0:0    306:611        0:0    227:440   \n",
       "TH218_E3_WB1     111:218        0:0    223:223        0:0        0:0   \n",
       "TH220_E1_WB1         0:0        0:0     83:184        0:0        0:0   \n",
       "TH220_E2             0:0        0:0    548:802        0:0        0:0   \n",
       "TH222_E1_CBC1    173:347        0:0    353:353        0:0        0:0   \n",
       "TH226_E3             0:0        0:0        0:0        0:0        0:0   \n",
       "TH226_E3_WB4         0:0        0:0        0:0        0:0        0:0   \n",
       "TH227_E2_WB1         0:0        0:0        0:0        0:0    114:232   \n",
       "TH231_E1             0:0        0:0    201:444        0:0        0:0   \n",
       "TH231_E4_WB1         0:0        0:0     84:187        0:0        0:0   \n",
       "TH236_E4_WB1         0:0        0:0        0:0        0:0    134:286   \n",
       "TH238_E1             0:0        0:0    677:677        0:0        0:0   \n",
       "TH238_E1_WB1         0:0        0:0    270:270        0:0        0:0   \n",
       "TH248_E3_WB1     291:619        0:0    302:660        0:0    240:482   \n",
       "TH266_E1_WB1     175:374        0:0    215:409        0:0        0:0   \n",
       "\n",
       "              BRAF_G643G BRAF_V600E BRCA2_K1132K BRCA2_N289H BRCA2_N372H  ...  \\\n",
       "sample                                                                    ...   \n",
       "TH041_E1_WB1         0:0        0:0          0:0         0:0      88:178  ...   \n",
       "TH067_E3_WB3         0:0        0:0       87:147         0:0      60:121  ...   \n",
       "TH067_E3_WB4         0:0        0:0       85:172         0:0      80:142  ...   \n",
       "TH067_E4             0:0        0:0      147:253         0:0     126:260  ...   \n",
       "TH067_E7             0:0        0:0      238:353         0:0     116:344  ...   \n",
       "TH107_E2_WB2      74:132        0:0          0:0         0:0      89:179  ...   \n",
       "TH116_E2             0:0        0:0          0:0     235:235         0:0  ...   \n",
       "TH116_E4_WB1         0:0        0:0          0:0     109:110         0:0  ...   \n",
       "TH146_E7_WB1         0:0        0:0       68:117         0:0         0:0  ...   \n",
       "TH150_E1_WB1      55:101        0:0      104:104         0:0         0:0  ...   \n",
       "TH153_E6_WB1         0:0        0:0          0:0         0:0     216:362  ...   \n",
       "TH155_E3_WB1       53:99        0:0       53:111         0:0         0:0  ...   \n",
       "TH155_E5         210:259        0:0       55:222         0:0         0:0  ...   \n",
       "TH156_E2_WB1         0:0        0:0          0:0         0:0      54:114  ...   \n",
       "TH158_E2_WB3         0:0        0:0       60:124         0:0         0:0  ...   \n",
       "TH169_E2         116:246        0:0      174:329     129:258         0:0  ...   \n",
       "TH169_E4         117:271        0:0      186:346     159:344         0:0  ...   \n",
       "TH169_E6_WB1      75:121        0:0       60:149      67:129         0:0  ...   \n",
       "TH171_E1_WB1      79:128        0:0       95:190         0:0         0:0  ...   \n",
       "TH171_E3         167:313        0:0      172:361         0:0         0:0  ...   \n",
       "TH172_E2_WB1      55:100        0:0       69:118         0:0      90:155  ...   \n",
       "TH172_E3         178:310        0:0      178:347         0:0     173:373  ...   \n",
       "TH174_E1_WB1         0:0        0:0          0:0         0:0         0:0  ...   \n",
       "TH178_E2_WB1       26:63        0:0          0:0         0:0         0:0  ...   \n",
       "TH179_E1             0:0     29:129      183:361         0:0     165:336  ...   \n",
       "TH179_E1_WB1         0:0        0:0       97:170         0:0     110:197  ...   \n",
       "TH183_E2_WB1      86:167        0:0          0:0         0:0      89:197  ...   \n",
       "TH187_E2_WB1         0:0        0:0       93:152      79:158         0:0  ...   \n",
       "TH187_E3             0:0        0:0      197:409     164:260         0:0  ...   \n",
       "TH199_E3_WB1         0:0        0:0      163:352         0:0     201:397  ...   \n",
       "TH205_E1_WB1         0:0        0:0       78:177         0:0     118:213  ...   \n",
       "TH205_E2             0:0        0:0      165:340         0:0     209:419  ...   \n",
       "TH208_E2_WB1         0:0        0:0          0:0         0:0         0:0  ...   \n",
       "TH208_E3             0:0        0:0          0:0         0:0         0:0  ...   \n",
       "TH208_E4             0:0        0:0          0:0         0:0         0:0  ...   \n",
       "TH210_E1         267:267        0:0      175:311         0:0         0:0  ...   \n",
       "TH210_E2_WB1     170:170        0:0       81:183         0:0         0:0  ...   \n",
       "TH217_E1_WB1         0:0        0:0      205:387         0:0     243:426  ...   \n",
       "TH218_E3_WB1     118:213        0:0      101:224         0:0         0:0  ...   \n",
       "TH220_E1_WB1         0:0        0:0          0:0         0:0         0:0  ...   \n",
       "TH220_E2             0:0        0:0          0:0         0:0         0:0  ...   \n",
       "TH222_E1_CBC1        0:0        0:0      323:323         0:0         0:0  ...   \n",
       "TH226_E3         150:245        0:0      146:278         0:0         0:0  ...   \n",
       "TH226_E3_WB4      84:150        0:0       78:180         0:0         0:0  ...   \n",
       "TH227_E2_WB1         0:0        0:0          0:0         0:0     136:258  ...   \n",
       "TH231_E1         171:298        0:0          0:0         0:0     227:388  ...   \n",
       "TH231_E4_WB1      93:141        0:0          0:0         0:0     104:168  ...   \n",
       "TH236_E4_WB1         0:0        0:0          0:0         0:0         0:0  ...   \n",
       "TH238_E1             0:0        0:0          0:0         0:0     286:568  ...   \n",
       "TH238_E1_WB1         0:0        0:0          0:0         0:0     122:238  ...   \n",
       "TH248_E3_WB1         0:0        0:0      402:404         0:0         0:0  ...   \n",
       "TH266_E1_WB1         0:0        0:0      153:300         0:0         0:0  ...   \n",
       "\n",
       "              SPTA1_A2365A SPTA1_K1693Q SPTA1_L1858V SPTA1_L958L SPTA1_P1986P  \\\n",
       "sample                                                                          \n",
       "TH041_E1_WB1         77:77      130:130          0:0       75:75          0:0   \n",
       "TH067_E3_WB3       169:337          0:0       63:122         0:0      109:223   \n",
       "TH067_E3_WB4        60:106          0:0        33:61         0:0        33:69   \n",
       "TH067_E4           132:223          0:0       91:201         0:0      115:235   \n",
       "TH067_E7           263:407          0:0      176:292         0:0      234:362   \n",
       "TH107_E2_WB2        52:103          0:0        31:69      65:109        29:57   \n",
       "TH116_E2           128:390          0:0       76:218         0:0       76:245   \n",
       "TH116_E4_WB1        71:148          0:0        38:83         0:0        46:98   \n",
       "TH146_E7_WB1         32:65          0:0        33:60         0:0        43:92   \n",
       "TH150_E1_WB1           0:0          0:0          0:0         0:0          0:0   \n",
       "TH153_E6_WB1       112:234      120:262          0:0      66:159          0:0   \n",
       "TH155_E3_WB1         46:75       66:125          0:0       43:90          0:0   \n",
       "TH155_E5           140:276      152:372          0:0     108:256          0:0   \n",
       "TH156_E2_WB1       574:574          0:0      219:219         0:0      477:478   \n",
       "TH158_E2_WB3         49:92        43:86          0:0       24:63          0:0   \n",
       "TH169_E2            99:196      256:256          0:0     170:170          0:0   \n",
       "TH169_E4           130:211      330:330          0:0     234:235          0:0   \n",
       "TH169_E6_WB1         51:98      145:145          0:0       46:46          0:0   \n",
       "TH171_E1_WB1        45:109       62:105          0:0      58:103          0:0   \n",
       "TH171_E3           171:326      166:337          0:0     109:209          0:0   \n",
       "TH172_E2_WB1           0:0          0:0          0:0         0:0          0:0   \n",
       "TH172_E3               0:0          0:0          0:0         0:0          0:0   \n",
       "TH174_E1_WB1           0:0          0:0          0:0         0:0       98:219   \n",
       "TH178_E2_WB1         45:45        21:48        16:29        6:19        14:29   \n",
       "TH179_E1           104:240          0:0       78:171         0:0       94:193   \n",
       "TH179_E1_WB1         55:94          0:0        45:93         0:0        52:90   \n",
       "TH183_E2_WB1           0:0          0:0          0:0         0:0          0:0   \n",
       "TH187_E2_WB1        56:122       62:125          0:0      62:103          0:0   \n",
       "TH187_E3           113:216      147:297          0:0     172:313          0:0   \n",
       "TH199_E3_WB1           0:0          0:0          0:0         0:0          0:0   \n",
       "TH205_E1_WB1        97:170       87:190          0:0      54:110          0:0   \n",
       "TH205_E2            99:273      136:318          0:0     134:296          0:0   \n",
       "TH208_E2_WB1        48:102       54:122          0:0       42:76          0:0   \n",
       "TH208_E3           156:315      170:337          0:0     111:214          0:0   \n",
       "TH208_E4           185:402      246:497          0:0     157:328          0:0   \n",
       "TH210_E1               0:0          0:0          0:0         0:0          0:0   \n",
       "TH210_E2_WB1           0:0          0:0          0:0         0:0          0:0   \n",
       "TH217_E1_WB1       128:284          0:0       91:198     129:270      130:263   \n",
       "TH218_E3_WB1       132:132       88:191       50:109      61:123       61:113   \n",
       "TH220_E1_WB1         86:86          0:0        60:60         0:0        80:80   \n",
       "TH220_E2           289:289          0:0      234:234         0:0      224:224   \n",
       "TH222_E1_CBC1      106:243      119:259          0:0     107:211          0:0   \n",
       "TH226_E3               0:0          0:0          0:0         0:0          0:0   \n",
       "TH226_E3_WB4           0:0          0:0          0:0         0:0          0:0   \n",
       "TH227_E2_WB1        61:180      120:226          0:0      72:171          0:0   \n",
       "TH231_E1               0:0          0:0          0:0         0:0          0:0   \n",
       "TH231_E4_WB1           0:0          0:0          0:0         0:0          0:0   \n",
       "TH236_E4_WB1           0:0          0:0          0:0         0:0          0:0   \n",
       "TH238_E1           173:379      195:470          0:0         0:0          0:0   \n",
       "TH238_E1_WB1        95:169      111:180          0:0         0:0          0:0   \n",
       "TH248_E3_WB1       317:317          0:0      204:204         0:0      262:262   \n",
       "TH266_E1_WB1           0:0          0:0          0:0         0:0          0:0   \n",
       "\n",
       "              TET2_I1762V  TP53_P72R TP53_R273C TP53_Y236C TPR_S960N  \n",
       "sample                                                                \n",
       "TH041_E1_WB1          0:0     63:127        0:0        0:0    70:161  \n",
       "TH067_E3_WB3      522:523    640:640        0:0        0:0       0:0  \n",
       "TH067_E3_WB4      118:118    137:137        0:0        0:0       0:0  \n",
       "TH067_E4          286:286    407:407        0:0        0:0       0:0  \n",
       "TH067_E7          512:512    469:469        0:0        0:0       0:0  \n",
       "TH107_E2_WB2       80:142     73:179        0:0        0:0       0:0  \n",
       "TH116_E2              0:0    448:448        0:0        0:0       0:0  \n",
       "TH116_E4_WB1          0:0    136:136        0:0        0:0       0:0  \n",
       "TH146_E7_WB1          0:0     52:110        0:0        0:0       0:0  \n",
       "TH150_E1_WB1          0:0     81:150        0:0        0:0       0:0  \n",
       "TH153_E6_WB1          0:0    173:413        0:0        0:0       0:0  \n",
       "TH155_E3_WB1        40:94     62:124        0:0        0:0       0:0  \n",
       "TH155_E5           45:261     37:228        0:0      55:71       0:0  \n",
       "TH156_E2_WB1          0:0  1038:1038        0:0        0:0       0:0  \n",
       "TH158_E2_WB3          0:0     92:162        0:0        0:0       0:0  \n",
       "TH169_E2              0:0    411:411        0:0        0:0       0:0  \n",
       "TH169_E4              0:0    469:469        0:0        0:0       0:0  \n",
       "TH169_E6_WB1          0:0    144:144        0:0        0:0       0:0  \n",
       "TH171_E1_WB1          0:0     62:162        0:0        0:0       0:0  \n",
       "TH171_E3              0:0    207:402        0:0        0:0       0:0  \n",
       "TH172_E2_WB1          0:0      43:98        0:0        0:0    60:110  \n",
       "TH172_E3              0:0    186:371        0:0        0:0   159:327  \n",
       "TH174_E1_WB1          0:0    258:509        0:0        0:0       0:0  \n",
       "TH178_E2_WB1          0:0      93:93        0:0        0:0       0:0  \n",
       "TH179_E1          164:337    159:328        0:0        0:0       0:0  \n",
       "TH179_E1_WB1       73:148     68:153        0:0        0:0       0:0  \n",
       "TH183_E2_WB1          0:0    184:184        0:0        0:0       0:0  \n",
       "TH187_E2_WB1          0:0        0:0        0:0        0:0       0:0  \n",
       "TH187_E3              0:0        0:0     68:355        0:0       0:0  \n",
       "TH199_E3_WB1          0:0    407:407        0:0        0:0   178:397  \n",
       "TH205_E1_WB1       97:192     83:210        0:0        0:0       0:0  \n",
       "TH205_E2          202:418    177:401        0:0        0:0       0:0  \n",
       "TH208_E2_WB1      150:150     83:182        0:0        0:0       0:0  \n",
       "TH208_E3          375:377    154:400        0:0        0:0       0:0  \n",
       "TH208_E4          518:518    290:686        0:0        0:0       0:0  \n",
       "TH210_E1              0:0        0:0        0:0        0:0       0:0  \n",
       "TH210_E2_WB1          0:0        0:0        0:0        0:0       0:0  \n",
       "TH217_E1_WB1          0:0    285:567        0:0        0:0       0:0  \n",
       "TH218_E3_WB1       97:217     94:202        0:0        0:0       0:0  \n",
       "TH220_E1_WB1          0:0        0:0        0:0        0:0    62:130  \n",
       "TH220_E2              0:0        0:0        0:0        0:0   125:365  \n",
       "TH222_E1_CBC1         0:0    372:372        0:0        0:0       0:0  \n",
       "TH226_E3          163:322    390:390        0:0        0:0   249:249  \n",
       "TH226_E3_WB4       65:149    181:181        0:0        0:0   144:144  \n",
       "TH227_E2_WB1      150:289    188:338        0:0        0:0       0:0  \n",
       "TH231_E1          167:347    330:330        0:0        0:0       0:0  \n",
       "TH231_E4_WB1       83:149    169:169        0:0        0:0       0:0  \n",
       "TH236_E4_WB1          0:0    468:468        0:0        0:0       0:0  \n",
       "TH238_E1              0:0    290:615        0:0        0:0       0:0  \n",
       "TH238_E1_WB1          0:0    124:239        0:0        0:0       0:0  \n",
       "TH248_E3_WB1          0:0    564:564        0:0        0:0       0:0  \n",
       "TH266_E1_WB1          0:0    179:377        0:0        0:0       0:0  \n",
       "\n",
       "[52 rows x 64 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# read in output, from cerebra \n",
    "WES_muts = pd.read_csv('../Data_input/mutation_input/coverage_x_sample_cerebra_WES.csv')\n",
    "WES_muts = WES_muts.set_index('sample')\n",
    "WES_muts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "tumor_exome_samples = []\n",
    "for x in list(WES_muts.index):\n",
    "    if 'WB' not in x and 'CBC' not in x:\n",
    "        tumor_exome_samples.append(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AKT1_E242E</th>\n",
       "      <th>ALK_A1200A</th>\n",
       "      <th>ALK_K1491R</th>\n",
       "      <th>ALK_R1436C</th>\n",
       "      <th>ALK_T1012T</th>\n",
       "      <th>BRAF_G643G</th>\n",
       "      <th>BRAF_V600E</th>\n",
       "      <th>BRCA2_K1132K</th>\n",
       "      <th>BRCA2_N289H</th>\n",
       "      <th>BRCA2_N372H</th>\n",
       "      <th>...</th>\n",
       "      <th>SPTA1_A2365A</th>\n",
       "      <th>SPTA1_K1693Q</th>\n",
       "      <th>SPTA1_L1858V</th>\n",
       "      <th>SPTA1_L958L</th>\n",
       "      <th>SPTA1_P1986P</th>\n",
       "      <th>TET2_I1762V</th>\n",
       "      <th>TP53_P72R</th>\n",
       "      <th>TP53_R273C</th>\n",
       "      <th>TP53_Y236C</th>\n",
       "      <th>TPR_S960N</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TH067_E4</th>\n",
       "      <td>172:375</td>\n",
       "      <td>0:0</td>\n",
       "      <td>212:448</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>147:253</td>\n",
       "      <td>0:0</td>\n",
       "      <td>126:260</td>\n",
       "      <td>...</td>\n",
       "      <td>132:223</td>\n",
       "      <td>0:0</td>\n",
       "      <td>91:201</td>\n",
       "      <td>0:0</td>\n",
       "      <td>115:235</td>\n",
       "      <td>286:286</td>\n",
       "      <td>407:407</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067_E7</th>\n",
       "      <td>340:706</td>\n",
       "      <td>0:0</td>\n",
       "      <td>374:611</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>238:353</td>\n",
       "      <td>0:0</td>\n",
       "      <td>116:344</td>\n",
       "      <td>...</td>\n",
       "      <td>263:407</td>\n",
       "      <td>0:0</td>\n",
       "      <td>176:292</td>\n",
       "      <td>0:0</td>\n",
       "      <td>234:362</td>\n",
       "      <td>512:512</td>\n",
       "      <td>469:469</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH116_E2</th>\n",
       "      <td>417:417</td>\n",
       "      <td>0:0</td>\n",
       "      <td>615:615</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>235:235</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>128:390</td>\n",
       "      <td>0:0</td>\n",
       "      <td>76:218</td>\n",
       "      <td>0:0</td>\n",
       "      <td>76:245</td>\n",
       "      <td>0:0</td>\n",
       "      <td>448:448</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH155_E5</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>76:306</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>210:259</td>\n",
       "      <td>0:0</td>\n",
       "      <td>55:222</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>140:276</td>\n",
       "      <td>152:372</td>\n",
       "      <td>0:0</td>\n",
       "      <td>108:256</td>\n",
       "      <td>0:0</td>\n",
       "      <td>45:261</td>\n",
       "      <td>37:228</td>\n",
       "      <td>0:0</td>\n",
       "      <td>55:71</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169_E2</th>\n",
       "      <td>364:364</td>\n",
       "      <td>0:0</td>\n",
       "      <td>195:404</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>116:246</td>\n",
       "      <td>0:0</td>\n",
       "      <td>174:329</td>\n",
       "      <td>129:258</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>99:196</td>\n",
       "      <td>256:256</td>\n",
       "      <td>0:0</td>\n",
       "      <td>170:170</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>411:411</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169_E4</th>\n",
       "      <td>465:465</td>\n",
       "      <td>0:0</td>\n",
       "      <td>234:436</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>117:271</td>\n",
       "      <td>0:0</td>\n",
       "      <td>186:346</td>\n",
       "      <td>159:344</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>130:211</td>\n",
       "      <td>330:330</td>\n",
       "      <td>0:0</td>\n",
       "      <td>234:235</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>469:469</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH171_E3</th>\n",
       "      <td>211:490</td>\n",
       "      <td>183:413</td>\n",
       "      <td>470:470</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>167:313</td>\n",
       "      <td>0:0</td>\n",
       "      <td>172:361</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>171:326</td>\n",
       "      <td>166:337</td>\n",
       "      <td>0:0</td>\n",
       "      <td>109:209</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>207:402</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH172_E3</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>178:310</td>\n",
       "      <td>0:0</td>\n",
       "      <td>178:347</td>\n",
       "      <td>0:0</td>\n",
       "      <td>173:373</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>186:371</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>159:327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179_E1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>29:129</td>\n",
       "      <td>183:361</td>\n",
       "      <td>0:0</td>\n",
       "      <td>165:336</td>\n",
       "      <td>...</td>\n",
       "      <td>104:240</td>\n",
       "      <td>0:0</td>\n",
       "      <td>78:171</td>\n",
       "      <td>0:0</td>\n",
       "      <td>94:193</td>\n",
       "      <td>164:337</td>\n",
       "      <td>159:328</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH187_E3</th>\n",
       "      <td>294:451</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>197:409</td>\n",
       "      <td>164:260</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>113:216</td>\n",
       "      <td>147:297</td>\n",
       "      <td>0:0</td>\n",
       "      <td>172:313</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>68:355</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH205_E2</th>\n",
       "      <td>504:504</td>\n",
       "      <td>0:0</td>\n",
       "      <td>444:444</td>\n",
       "      <td>0:0</td>\n",
       "      <td>196:396</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>165:340</td>\n",
       "      <td>0:0</td>\n",
       "      <td>209:419</td>\n",
       "      <td>...</td>\n",
       "      <td>99:273</td>\n",
       "      <td>136:318</td>\n",
       "      <td>0:0</td>\n",
       "      <td>134:296</td>\n",
       "      <td>0:0</td>\n",
       "      <td>202:418</td>\n",
       "      <td>177:401</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH208_E3</th>\n",
       "      <td>384:384</td>\n",
       "      <td>0:0</td>\n",
       "      <td>481:481</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>156:315</td>\n",
       "      <td>170:337</td>\n",
       "      <td>0:0</td>\n",
       "      <td>111:214</td>\n",
       "      <td>0:0</td>\n",
       "      <td>375:377</td>\n",
       "      <td>154:400</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH208_E4</th>\n",
       "      <td>612:612</td>\n",
       "      <td>0:0</td>\n",
       "      <td>771:771</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>185:402</td>\n",
       "      <td>246:497</td>\n",
       "      <td>0:0</td>\n",
       "      <td>157:328</td>\n",
       "      <td>0:0</td>\n",
       "      <td>518:518</td>\n",
       "      <td>290:686</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH210_E1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>242:519</td>\n",
       "      <td>0:0</td>\n",
       "      <td>151:351</td>\n",
       "      <td>267:267</td>\n",
       "      <td>0:0</td>\n",
       "      <td>175:311</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH220_E2</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>548:802</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>289:289</td>\n",
       "      <td>0:0</td>\n",
       "      <td>234:234</td>\n",
       "      <td>0:0</td>\n",
       "      <td>224:224</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>125:365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH226_E3</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>150:245</td>\n",
       "      <td>0:0</td>\n",
       "      <td>146:278</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>163:322</td>\n",
       "      <td>390:390</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>249:249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH231_E1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>201:444</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>171:298</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>227:388</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>167:347</td>\n",
       "      <td>330:330</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH238_E1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>677:677</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>286:568</td>\n",
       "      <td>...</td>\n",
       "      <td>173:379</td>\n",
       "      <td>195:470</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>290:615</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>18 rows × 64 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         AKT1_E242E ALK_A1200A ALK_K1491R ALK_R1436C ALK_T1012T BRAF_G643G  \\\n",
       "sample                                                                       \n",
       "TH067_E4    172:375        0:0    212:448        0:0        0:0        0:0   \n",
       "TH067_E7    340:706        0:0    374:611        0:0        0:0        0:0   \n",
       "TH116_E2    417:417        0:0    615:615        0:0        0:0        0:0   \n",
       "TH155_E5        0:0        0:0     76:306        0:0        0:0    210:259   \n",
       "TH169_E2    364:364        0:0    195:404        0:0        0:0    116:246   \n",
       "TH169_E4    465:465        0:0    234:436        0:0        0:0    117:271   \n",
       "TH171_E3    211:490    183:413    470:470        0:0        0:0    167:313   \n",
       "TH172_E3        0:0        0:0        0:0        0:0        0:0    178:310   \n",
       "TH179_E1        0:0        0:0        0:0        0:0        0:0        0:0   \n",
       "TH187_E3    294:451        0:0        0:0        0:0        0:0        0:0   \n",
       "TH205_E2    504:504        0:0    444:444        0:0    196:396        0:0   \n",
       "TH208_E3    384:384        0:0    481:481        0:0        0:0        0:0   \n",
       "TH208_E4    612:612        0:0    771:771        0:0        0:0        0:0   \n",
       "TH210_E1        0:0        0:0    242:519        0:0    151:351    267:267   \n",
       "TH220_E2        0:0        0:0    548:802        0:0        0:0        0:0   \n",
       "TH226_E3        0:0        0:0        0:0        0:0        0:0    150:245   \n",
       "TH231_E1        0:0        0:0    201:444        0:0        0:0    171:298   \n",
       "TH238_E1        0:0        0:0    677:677        0:0        0:0        0:0   \n",
       "\n",
       "         BRAF_V600E BRCA2_K1132K BRCA2_N289H BRCA2_N372H  ... SPTA1_A2365A  \\\n",
       "sample                                                    ...                \n",
       "TH067_E4        0:0      147:253         0:0     126:260  ...      132:223   \n",
       "TH067_E7        0:0      238:353         0:0     116:344  ...      263:407   \n",
       "TH116_E2        0:0          0:0     235:235         0:0  ...      128:390   \n",
       "TH155_E5        0:0       55:222         0:0         0:0  ...      140:276   \n",
       "TH169_E2        0:0      174:329     129:258         0:0  ...       99:196   \n",
       "TH169_E4        0:0      186:346     159:344         0:0  ...      130:211   \n",
       "TH171_E3        0:0      172:361         0:0         0:0  ...      171:326   \n",
       "TH172_E3        0:0      178:347         0:0     173:373  ...          0:0   \n",
       "TH179_E1     29:129      183:361         0:0     165:336  ...      104:240   \n",
       "TH187_E3        0:0      197:409     164:260         0:0  ...      113:216   \n",
       "TH205_E2        0:0      165:340         0:0     209:419  ...       99:273   \n",
       "TH208_E3        0:0          0:0         0:0         0:0  ...      156:315   \n",
       "TH208_E4        0:0          0:0         0:0         0:0  ...      185:402   \n",
       "TH210_E1        0:0      175:311         0:0         0:0  ...          0:0   \n",
       "TH220_E2        0:0          0:0         0:0         0:0  ...      289:289   \n",
       "TH226_E3        0:0      146:278         0:0         0:0  ...          0:0   \n",
       "TH231_E1        0:0          0:0         0:0     227:388  ...          0:0   \n",
       "TH238_E1        0:0          0:0         0:0     286:568  ...      173:379   \n",
       "\n",
       "         SPTA1_K1693Q SPTA1_L1858V SPTA1_L958L SPTA1_P1986P TET2_I1762V  \\\n",
       "sample                                                                    \n",
       "TH067_E4          0:0       91:201         0:0      115:235     286:286   \n",
       "TH067_E7          0:0      176:292         0:0      234:362     512:512   \n",
       "TH116_E2          0:0       76:218         0:0       76:245         0:0   \n",
       "TH155_E5      152:372          0:0     108:256          0:0      45:261   \n",
       "TH169_E2      256:256          0:0     170:170          0:0         0:0   \n",
       "TH169_E4      330:330          0:0     234:235          0:0         0:0   \n",
       "TH171_E3      166:337          0:0     109:209          0:0         0:0   \n",
       "TH172_E3          0:0          0:0         0:0          0:0         0:0   \n",
       "TH179_E1          0:0       78:171         0:0       94:193     164:337   \n",
       "TH187_E3      147:297          0:0     172:313          0:0         0:0   \n",
       "TH205_E2      136:318          0:0     134:296          0:0     202:418   \n",
       "TH208_E3      170:337          0:0     111:214          0:0     375:377   \n",
       "TH208_E4      246:497          0:0     157:328          0:0     518:518   \n",
       "TH210_E1          0:0          0:0         0:0          0:0         0:0   \n",
       "TH220_E2          0:0      234:234         0:0      224:224         0:0   \n",
       "TH226_E3          0:0          0:0         0:0          0:0     163:322   \n",
       "TH231_E1          0:0          0:0         0:0          0:0     167:347   \n",
       "TH238_E1      195:470          0:0         0:0          0:0         0:0   \n",
       "\n",
       "         TP53_P72R TP53_R273C TP53_Y236C TPR_S960N  \n",
       "sample                                              \n",
       "TH067_E4   407:407        0:0        0:0       0:0  \n",
       "TH067_E7   469:469        0:0        0:0       0:0  \n",
       "TH116_E2   448:448        0:0        0:0       0:0  \n",
       "TH155_E5    37:228        0:0      55:71       0:0  \n",
       "TH169_E2   411:411        0:0        0:0       0:0  \n",
       "TH169_E4   469:469        0:0        0:0       0:0  \n",
       "TH171_E3   207:402        0:0        0:0       0:0  \n",
       "TH172_E3   186:371        0:0        0:0   159:327  \n",
       "TH179_E1   159:328        0:0        0:0       0:0  \n",
       "TH187_E3       0:0     68:355        0:0       0:0  \n",
       "TH205_E2   177:401        0:0        0:0       0:0  \n",
       "TH208_E3   154:400        0:0        0:0       0:0  \n",
       "TH208_E4   290:686        0:0        0:0       0:0  \n",
       "TH210_E1       0:0        0:0        0:0       0:0  \n",
       "TH220_E2       0:0        0:0        0:0   125:365  \n",
       "TH226_E3   390:390        0:0        0:0   249:249  \n",
       "TH231_E1   330:330        0:0        0:0       0:0  \n",
       "TH238_E1   290:615        0:0        0:0       0:0  \n",
       "\n",
       "[18 rows x 64 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# taking just the ones we have tumor exome for\n",
    "    # ie. not WB\n",
    "WES_muts_tumor = WES_muts[WES_muts.index.isin(tumor_exome_samples)]\n",
    "WES_muts_tumor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "found_muts = list(WES_muts_tumor.columns)\n",
    "\n",
    "syno_muts = []\n",
    "non_syno_muts = []\n",
    "\n",
    "for x in found_muts:\n",
    "    mut = x.split('_')[1]\n",
    "    if mut[0] == mut[-1]:\n",
    "        syno_muts.append(x)\n",
    "    else:\n",
    "        non_syno_muts.append(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "27"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(non_syno_muts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ALK_K1491R</th>\n",
       "      <th>ALK_R1436C</th>\n",
       "      <th>BRAF_V600E</th>\n",
       "      <th>BRCA2_N289H</th>\n",
       "      <th>BRCA2_N372H</th>\n",
       "      <th>DROSHA_S321L</th>\n",
       "      <th>EGFR_D1014N</th>\n",
       "      <th>EGFR_K745_A750&gt;T</th>\n",
       "      <th>EGFR_L747_S752delLREATS</th>\n",
       "      <th>EGFR_L747_T751delLREAT</th>\n",
       "      <th>...</th>\n",
       "      <th>ROS1_K2228Q</th>\n",
       "      <th>ROS1_N2240K</th>\n",
       "      <th>ROS1_S2229C</th>\n",
       "      <th>SPTA1_K1693Q</th>\n",
       "      <th>SPTA1_L1858V</th>\n",
       "      <th>TET2_I1762V</th>\n",
       "      <th>TP53_P72R</th>\n",
       "      <th>TP53_R273C</th>\n",
       "      <th>TP53_Y236C</th>\n",
       "      <th>TPR_S960N</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sample</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TH067_E4</th>\n",
       "      <td>212:448</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>126:260</td>\n",
       "      <td>80:219</td>\n",
       "      <td>0:0</td>\n",
       "      <td>19:137</td>\n",
       "      <td>19:137</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>88:182</td>\n",
       "      <td>0:0</td>\n",
       "      <td>90:185</td>\n",
       "      <td>0:0</td>\n",
       "      <td>91:201</td>\n",
       "      <td>286:286</td>\n",
       "      <td>407:407</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067_E7</th>\n",
       "      <td>374:611</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>116:344</td>\n",
       "      <td>144:420</td>\n",
       "      <td>0:0</td>\n",
       "      <td>58:230</td>\n",
       "      <td>58:230</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>152:233</td>\n",
       "      <td>0:0</td>\n",
       "      <td>151:229</td>\n",
       "      <td>0:0</td>\n",
       "      <td>176:292</td>\n",
       "      <td>512:512</td>\n",
       "      <td>469:469</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH116_E2</th>\n",
       "      <td>615:615</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>235:235</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>76:218</td>\n",
       "      <td>0:0</td>\n",
       "      <td>448:448</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH155_E5</th>\n",
       "      <td>76:306</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>155:249</td>\n",
       "      <td>0:0</td>\n",
       "      <td>27:164</td>\n",
       "      <td>0:0</td>\n",
       "      <td>27:176</td>\n",
       "      <td>...</td>\n",
       "      <td>131:213</td>\n",
       "      <td>0:0</td>\n",
       "      <td>133:211</td>\n",
       "      <td>152:372</td>\n",
       "      <td>0:0</td>\n",
       "      <td>45:261</td>\n",
       "      <td>37:228</td>\n",
       "      <td>0:0</td>\n",
       "      <td>55:71</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169_E2</th>\n",
       "      <td>195:404</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>129:258</td>\n",
       "      <td>0:0</td>\n",
       "      <td>55:161</td>\n",
       "      <td>0:0</td>\n",
       "      <td>20:158</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>256:256</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>411:411</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169_E4</th>\n",
       "      <td>234:436</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>159:344</td>\n",
       "      <td>0:0</td>\n",
       "      <td>47:194</td>\n",
       "      <td>0:0</td>\n",
       "      <td>24:228</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>330:330</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>469:469</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH171_E3</th>\n",
       "      <td>470:470</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>107:206</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>166:337</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>207:402</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH172_E3</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>173:373</td>\n",
       "      <td>75:159</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>186:371</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>159:327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179_E1</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>29:129</td>\n",
       "      <td>0:0</td>\n",
       "      <td>165:336</td>\n",
       "      <td>99:176</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>82:136</td>\n",
       "      <td>0:0</td>\n",
       "      <td>83:137</td>\n",
       "      <td>0:0</td>\n",
       "      <td>78:171</td>\n",
       "      <td>164:337</td>\n",
       "      <td>159:328</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH187_E3</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>164:260</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>147:297</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>68:355</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH205_E2</th>\n",
       "      <td>444:444</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>209:419</td>\n",
       "      <td>0:0</td>\n",
       "      <td>231:495</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>136:318</td>\n",
       "      <td>0:0</td>\n",
       "      <td>202:418</td>\n",
       "      <td>177:401</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH208_E3</th>\n",
       "      <td>481:481</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>100:219</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>170:337</td>\n",
       "      <td>0:0</td>\n",
       "      <td>375:377</td>\n",
       "      <td>154:400</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH208_E4</th>\n",
       "      <td>771:771</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>156:310</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>246:497</td>\n",
       "      <td>0:0</td>\n",
       "      <td>518:518</td>\n",
       "      <td>290:686</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH210_E1</th>\n",
       "      <td>242:519</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>90:161</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH220_E2</th>\n",
       "      <td>548:802</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>53:198</td>\n",
       "      <td>0:0</td>\n",
       "      <td>53:198</td>\n",
       "      <td>0:0</td>\n",
       "      <td>234:234</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>125:365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH226_E3</th>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>14:177</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>163:322</td>\n",
       "      <td>390:390</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>249:249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH231_E1</th>\n",
       "      <td>201:444</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>227:388</td>\n",
       "      <td>161:161</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>124:228</td>\n",
       "      <td>0:0</td>\n",
       "      <td>127:232</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>167:347</td>\n",
       "      <td>330:330</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH238_E1</th>\n",
       "      <td>677:677</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>286:568</td>\n",
       "      <td>228:228</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>...</td>\n",
       "      <td>134:268</td>\n",
       "      <td>0:0</td>\n",
       "      <td>132:263</td>\n",
       "      <td>195:470</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>290:615</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "      <td>0:0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>18 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         ALK_K1491R ALK_R1436C BRAF_V600E BRCA2_N289H BRCA2_N372H  \\\n",
       "sample                                                              \n",
       "TH067_E4    212:448        0:0        0:0         0:0     126:260   \n",
       "TH067_E7    374:611        0:0        0:0         0:0     116:344   \n",
       "TH116_E2    615:615        0:0        0:0     235:235         0:0   \n",
       "TH155_E5     76:306        0:0        0:0         0:0         0:0   \n",
       "TH169_E2    195:404        0:0        0:0     129:258         0:0   \n",
       "TH169_E4    234:436        0:0        0:0     159:344         0:0   \n",
       "TH171_E3    470:470        0:0        0:0         0:0         0:0   \n",
       "TH172_E3        0:0        0:0        0:0         0:0     173:373   \n",
       "TH179_E1        0:0        0:0     29:129         0:0     165:336   \n",
       "TH187_E3        0:0        0:0        0:0     164:260         0:0   \n",
       "TH205_E2    444:444        0:0        0:0         0:0     209:419   \n",
       "TH208_E3    481:481        0:0        0:0         0:0         0:0   \n",
       "TH208_E4    771:771        0:0        0:0         0:0         0:0   \n",
       "TH210_E1    242:519        0:0        0:0         0:0         0:0   \n",
       "TH220_E2    548:802        0:0        0:0         0:0         0:0   \n",
       "TH226_E3        0:0        0:0        0:0         0:0         0:0   \n",
       "TH231_E1    201:444        0:0        0:0         0:0     227:388   \n",
       "TH238_E1    677:677        0:0        0:0         0:0     286:568   \n",
       "\n",
       "         DROSHA_S321L EGFR_D1014N EGFR_K745_A750>T EGFR_L747_S752delLREATS  \\\n",
       "sample                                                                       \n",
       "TH067_E4       80:219         0:0           19:137                  19:137   \n",
       "TH067_E7      144:420         0:0           58:230                  58:230   \n",
       "TH116_E2          0:0         0:0              0:0                     0:0   \n",
       "TH155_E5      155:249         0:0           27:164                     0:0   \n",
       "TH169_E2       55:161         0:0           20:158                     0:0   \n",
       "TH169_E4       47:194         0:0           24:228                     0:0   \n",
       "TH171_E3      107:206         0:0              0:0                     0:0   \n",
       "TH172_E3       75:159         0:0              0:0                     0:0   \n",
       "TH179_E1       99:176         0:0              0:0                     0:0   \n",
       "TH187_E3          0:0         0:0              0:0                     0:0   \n",
       "TH205_E2          0:0     231:495              0:0                     0:0   \n",
       "TH208_E3      100:219         0:0              0:0                     0:0   \n",
       "TH208_E4      156:310         0:0              0:0                     0:0   \n",
       "TH210_E1          0:0         0:0              0:0                     0:0   \n",
       "TH220_E2          0:0         0:0              0:0                     0:0   \n",
       "TH226_E3          0:0         0:0           14:177                     0:0   \n",
       "TH231_E1      161:161         0:0              0:0                     0:0   \n",
       "TH238_E1      228:228         0:0              0:0                     0:0   \n",
       "\n",
       "         EGFR_L747_T751delLREAT  ... ROS1_K2228Q ROS1_N2240K ROS1_S2229C  \\\n",
       "sample                           ...                                       \n",
       "TH067_E4                    0:0  ...      88:182         0:0      90:185   \n",
       "TH067_E7                    0:0  ...     152:233         0:0     151:229   \n",
       "TH116_E2                    0:0  ...         0:0         0:0         0:0   \n",
       "TH155_E5                 27:176  ...     131:213         0:0     133:211   \n",
       "TH169_E2                    0:0  ...         0:0         0:0         0:0   \n",
       "TH169_E4                    0:0  ...         0:0         0:0         0:0   \n",
       "TH171_E3                    0:0  ...         0:0         0:0         0:0   \n",
       "TH172_E3                    0:0  ...         0:0         0:0         0:0   \n",
       "TH179_E1                    0:0  ...      82:136         0:0      83:137   \n",
       "TH187_E3                    0:0  ...         0:0         0:0         0:0   \n",
       "TH205_E2                    0:0  ...         0:0         0:0         0:0   \n",
       "TH208_E3                    0:0  ...         0:0         0:0         0:0   \n",
       "TH208_E4                    0:0  ...         0:0         0:0         0:0   \n",
       "TH210_E1                    0:0  ...         0:0      90:161         0:0   \n",
       "TH220_E2                    0:0  ...      53:198         0:0      53:198   \n",
       "TH226_E3                    0:0  ...         0:0         0:0         0:0   \n",
       "TH231_E1                    0:0  ...     124:228         0:0     127:232   \n",
       "TH238_E1                    0:0  ...     134:268         0:0     132:263   \n",
       "\n",
       "         SPTA1_K1693Q SPTA1_L1858V TET2_I1762V TP53_P72R TP53_R273C  \\\n",
       "sample                                                                \n",
       "TH067_E4          0:0       91:201     286:286   407:407        0:0   \n",
       "TH067_E7          0:0      176:292     512:512   469:469        0:0   \n",
       "TH116_E2          0:0       76:218         0:0   448:448        0:0   \n",
       "TH155_E5      152:372          0:0      45:261    37:228        0:0   \n",
       "TH169_E2      256:256          0:0         0:0   411:411        0:0   \n",
       "TH169_E4      330:330          0:0         0:0   469:469        0:0   \n",
       "TH171_E3      166:337          0:0         0:0   207:402        0:0   \n",
       "TH172_E3          0:0          0:0         0:0   186:371        0:0   \n",
       "TH179_E1          0:0       78:171     164:337   159:328        0:0   \n",
       "TH187_E3      147:297          0:0         0:0       0:0     68:355   \n",
       "TH205_E2      136:318          0:0     202:418   177:401        0:0   \n",
       "TH208_E3      170:337          0:0     375:377   154:400        0:0   \n",
       "TH208_E4      246:497          0:0     518:518   290:686        0:0   \n",
       "TH210_E1          0:0          0:0         0:0       0:0        0:0   \n",
       "TH220_E2          0:0      234:234         0:0       0:0        0:0   \n",
       "TH226_E3          0:0          0:0     163:322   390:390        0:0   \n",
       "TH231_E1          0:0          0:0     167:347   330:330        0:0   \n",
       "TH238_E1      195:470          0:0         0:0   290:615        0:0   \n",
       "\n",
       "         TP53_Y236C TPR_S960N  \n",
       "sample                         \n",
       "TH067_E4        0:0       0:0  \n",
       "TH067_E7        0:0       0:0  \n",
       "TH116_E2        0:0       0:0  \n",
       "TH155_E5      55:71       0:0  \n",
       "TH169_E2        0:0       0:0  \n",
       "TH169_E4        0:0       0:0  \n",
       "TH171_E3        0:0       0:0  \n",
       "TH172_E3        0:0   159:327  \n",
       "TH179_E1        0:0       0:0  \n",
       "TH187_E3        0:0       0:0  \n",
       "TH205_E2        0:0       0:0  \n",
       "TH208_E3        0:0       0:0  \n",
       "TH208_E4        0:0       0:0  \n",
       "TH210_E1        0:0       0:0  \n",
       "TH220_E2        0:0   125:365  \n",
       "TH226_E3        0:0   249:249  \n",
       "TH231_E1        0:0       0:0  \n",
       "TH238_E1        0:0       0:0  \n",
       "\n",
       "[18 rows x 27 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "WES_muts_tumor_trimmed = WES_muts_tumor[non_syno_muts] # just the non-synonomous mutations\n",
    "WES_muts_tumor_trimmed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# init empty dict, just a list of the patients we've got\n",
    "patients_dict_wes = {}\n",
    "\n",
    "for idx, row in WES_muts_tumor_trimmed.iterrows():\n",
    "    curr_sample = idx\n",
    "    curr_sample = curr_sample.split('_')[0]\n",
    "    \n",
    "    patients_dict_wes.update({curr_sample : []})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fill it in, with mutations for each patient\n",
    "    # IMPORTANT -- COMBINING SAMPLES FROM THE SAME PATIENT HERE\n",
    "for idx, row in WES_muts_tumor_trimmed.iterrows():\n",
    "    curr_patient = idx\n",
    "    curr_patient = curr_patient.split('_')[0]\n",
    "    \n",
    "    curr_val = patients_dict_wes[curr_patient]\n",
    "    \n",
    "    for i in range(1, len(row)): # starting at 1 to avoid sample name\n",
    "        curr_ratio = row[i]\n",
    "        curr_mut = row.index[i]\n",
    "        \n",
    "        if curr_ratio != '0:0':\n",
    "            curr_val.append(curr_mut)\n",
    "    \n",
    "    patients_dict_wes.update({curr_patient : curr_val})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TH067': ['BRCA2_N372H',\n",
       "  'DROSHA_S321L',\n",
       "  'EGFR_K745_A750>T',\n",
       "  'EGFR_L747_S752delLREATS',\n",
       "  'EGFR_R521K',\n",
       "  'ERBB2_I655V',\n",
       "  'PIK3CA_I391M',\n",
       "  'ROS1_K2228Q',\n",
       "  'ROS1_S2229C',\n",
       "  'SPTA1_L1858V',\n",
       "  'TET2_I1762V',\n",
       "  'TP53_P72R',\n",
       "  'BRCA2_N372H',\n",
       "  'DROSHA_S321L',\n",
       "  'EGFR_K745_A750>T',\n",
       "  'EGFR_L747_S752delLREATS',\n",
       "  'EGFR_R521K',\n",
       "  'ERBB2_I655V',\n",
       "  'PIK3CA_I391M',\n",
       "  'ROS1_K2228Q',\n",
       "  'ROS1_S2229C',\n",
       "  'SPTA1_L1858V',\n",
       "  'TET2_I1762V',\n",
       "  'TP53_P72R'],\n",
       " 'TH116': ['BRCA2_N289H',\n",
       "  'EGFR_L858R',\n",
       "  'EGFR_R521K',\n",
       "  'RET_T278N',\n",
       "  'SPTA1_L1858V',\n",
       "  'TP53_P72R'],\n",
       " 'TH155': ['DROSHA_S321L',\n",
       "  'EGFR_K745_A750>T',\n",
       "  'EGFR_L747_T751delLREAT',\n",
       "  'EGFR_R521K',\n",
       "  'ERBB2_I655V',\n",
       "  'ROS1_K2228Q',\n",
       "  'ROS1_S2229C',\n",
       "  'SPTA1_K1693Q',\n",
       "  'TET2_I1762V',\n",
       "  'TP53_P72R',\n",
       "  'TP53_Y236C'],\n",
       " 'TH169': ['BRCA2_N289H',\n",
       "  'DROSHA_S321L',\n",
       "  'EGFR_K745_A750>T',\n",
       "  'EGFR_R521K',\n",
       "  'SPTA1_K1693Q',\n",
       "  'TP53_P72R',\n",
       "  'BRCA2_N289H',\n",
       "  'DROSHA_S321L',\n",
       "  'EGFR_K745_A750>T',\n",
       "  'EGFR_R521K',\n",
       "  'SPTA1_K1693Q',\n",
       "  'TP53_P72R'],\n",
       " 'TH171': ['DROSHA_S321L', 'SPTA1_K1693Q', 'TP53_P72R'],\n",
       " 'TH172': ['BRCA2_N372H',\n",
       "  'DROSHA_S321L',\n",
       "  'EGFR_R521K',\n",
       "  'PIK3CA_I391M',\n",
       "  'TP53_P72R',\n",
       "  'TPR_S960N'],\n",
       " 'TH179': ['BRAF_V600E',\n",
       "  'BRCA2_N372H',\n",
       "  'DROSHA_S321L',\n",
       "  'RET_G691S',\n",
       "  'ROS1_K2228Q',\n",
       "  'ROS1_S2229C',\n",
       "  'SPTA1_L1858V',\n",
       "  'TET2_I1762V',\n",
       "  'TP53_P72R'],\n",
       " 'TH187': ['BRCA2_N289H', 'EGFR_R521K', 'SPTA1_K1693Q', 'TP53_R273C'],\n",
       " 'TH205': ['BRCA2_N372H',\n",
       "  'EGFR_D1014N',\n",
       "  'EGFR_R521K',\n",
       "  'ERBB2_I655V',\n",
       "  'SPTA1_K1693Q',\n",
       "  'TET2_I1762V',\n",
       "  'TP53_P72R'],\n",
       " 'TH208': ['DROSHA_S321L',\n",
       "  'EGFR_L861Q',\n",
       "  'EGFR_R521K',\n",
       "  'SPTA1_K1693Q',\n",
       "  'TET2_I1762V',\n",
       "  'TP53_P72R',\n",
       "  'DROSHA_S321L',\n",
       "  'EGFR_L861Q',\n",
       "  'EGFR_R521K',\n",
       "  'SPTA1_K1693Q',\n",
       "  'TET2_I1762V',\n",
       "  'TP53_P72R'],\n",
       " 'TH210': ['PIK3CA_I391M', 'RET_G691S', 'ROS1_N2240K'],\n",
       " 'TH220': ['EGFR_R521K',\n",
       "  'ERBB2_I655V',\n",
       "  'ROS1_K2228Q',\n",
       "  'ROS1_S2229C',\n",
       "  'SPTA1_L1858V',\n",
       "  'TPR_S960N'],\n",
       " 'TH226': ['EGFR_K745_A750>T',\n",
       "  'EGFR_R521K',\n",
       "  'ERBB2_I655V',\n",
       "  'RET_G691S',\n",
       "  'TET2_I1762V',\n",
       "  'TP53_P72R',\n",
       "  'TPR_S960N'],\n",
       " 'TH231': ['BRCA2_N372H',\n",
       "  'DROSHA_S321L',\n",
       "  'ROS1_K2228Q',\n",
       "  'ROS1_S2229C',\n",
       "  'TET2_I1762V',\n",
       "  'TP53_P72R'],\n",
       " 'TH238': ['BRCA2_N372H',\n",
       "  'DROSHA_S321L',\n",
       "  'EGFR_R521K',\n",
       "  'RET_G691S',\n",
       "  'ROS1_K2228Q',\n",
       "  'ROS1_S2229C',\n",
       "  'SPTA1_K1693Q',\n",
       "  'TP53_P72R']}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this looks great!!\n",
    "patients_dict_wes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "#//////////////////////////////////////////////////////////////////////////////\n",
    "#//////////////////////////////////////////////////////////////////////////////\n",
    "#///    i think the fist step is actually converting our old vali_tbl   ///////\n",
    "#///            from by SAMPLE to by PATIENT                            ///////                  \n",
    "#//////////////////////////////////////////////////////////////////////////////"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patient</th>\n",
       "      <th>driver_gene</th>\n",
       "      <th>driver_mutation</th>\n",
       "      <th>mutations_found</th>\n",
       "      <th>numTumorCells</th>\n",
       "      <th>numTumorCells_w_coverage_to_ROI</th>\n",
       "      <th>numTumorCells_clinMut_found</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TH231</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>KEAP1 V271M, KEAP1 R272H, RAD21 L324P, MAP2K2 ...</td>\n",
       "      <td>291</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TH238_NAT</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>MAP2K2 I220I,</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TH103</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>DROSHA N1255N,  PTPN13 S1001S, KEAP1 L471L,  E...</td>\n",
       "      <td>51</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>TH226</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>EGFR E746_A750delELREA, KEAP1 S592R,  SMARCA4...</td>\n",
       "      <td>14</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>TH223</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>EGFR T903T, SMARCA4 G394W, KEAP1 M161I,</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>TH179</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>SMARCA4 P153fs*150, BRAF V600E, STK11 R106R,  ...</td>\n",
       "      <td>55</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>TH103</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>KEAP1 L471L,</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>TH210</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>MAP2K2 I220I,</td>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>TH169</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>STK11 G294C,  EGFR E746_A750delELREA, KEAP1 M5...</td>\n",
       "      <td>177</td>\n",
       "      <td>24</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>TH222</td>\n",
       "      <td>ROS1</td>\n",
       "      <td>fusion</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>TH238</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>KRAS Q61H, MAP2K2 R297R, NFE2L2 G81S, KEAP1 V1...</td>\n",
       "      <td>240</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>TH205</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>EGFR L858R,</td>\n",
       "      <td>24</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>TH220</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>ALK A1251D,  KEAP1 V271M, KEAP1 R483C,  ALK--E...</td>\n",
       "      <td>132</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>TH179_NAT</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>PTPN13 S1001S,  KEAP1 L471L,  SMARCA4 H508H,  ...</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>TH248</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>SMARCA4 L6F,  EGFR K745_A750&gt;T,  TP53 L52P, E...</td>\n",
       "      <td>305</td>\n",
       "      <td>14</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>TH158</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>TH227</td>\n",
       "      <td>ALK</td>\n",
       "      <td>intron 19 rearrangement</td>\n",
       "      <td>NKX21 P155fs*43, HIF1A A612T, NFE2L2 S414N,</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>TH185</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PTP...</td>\n",
       "      <td>962</td>\n",
       "      <td>198</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>TH222</td>\n",
       "      <td>ROS1</td>\n",
       "      <td>fusion</td>\n",
       "      <td>KEAP1 W497*, PTPRT A1266A,</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>TH185</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>KEAP1 Y537Y,</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>TH226</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>EGFR G42D,  SMARCA4 P153fs*150, KEAP1 A95S, N...</td>\n",
       "      <td>187</td>\n",
       "      <td>9</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>TH179</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>NFE2L2 T80A, DROSHA E1363D, SMARCA4 A948V,  ST...</td>\n",
       "      <td>63</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>TH218</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>KEAP1 V155A,</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>TH103</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>EGFR T629T,  KEAP1 L471L,  EGFR R521K,</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>TH171</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>TP53 P105P, NOTCH1 T1996M, SMARCA4 R521W,  MAP...</td>\n",
       "      <td>634</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>TH179</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>SMARCA4 P153fs*150, KEAP1 G417E,</td>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>TH236</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>MAP2K2 R297R,  TP53 S166*, RAD21 V284F,  NFE2L...</td>\n",
       "      <td>107</td>\n",
       "      <td>19</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>TH248</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>TP53 V175L,  EGFR K745_A750&gt;T,  MAP2K2 R299M, ...</td>\n",
       "      <td>71</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>TH231</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>NFE2L2 G31E,</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>TH225</td>\n",
       "      <td>KRAS</td>\n",
       "      <td>G12C</td>\n",
       "      <td>EGFR R521K,  MAP2K2 I220I,  KRAS G12C,  EGFR ...</td>\n",
       "      <td>21</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>TH146</td>\n",
       "      <td>ROS1</td>\n",
       "      <td>ROS1-CD74</td>\n",
       "      <td>ROS1--CD74 fusion,</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>TH067</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>MAP2K2 F338Y,  TP53 F59F, SMARCA4 R521W, TP53 ...</td>\n",
       "      <td>101</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>TH266</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>TP53 E180K,  ALK--EML4 fusion, BAP1 A140V,  T...</td>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>TH217</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>TH218</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>TH155</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>TH169</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>TH185</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>TH158</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      patient driver_gene          driver_mutation  \\\n",
       "0       TH231         ALK                   fusion   \n",
       "1   TH238_NAT        BRAF                    V600E   \n",
       "2       TH103         ALK                   fusion   \n",
       "3       TH226        EGFR                    del19   \n",
       "4       TH223        EGFR                    del19   \n",
       "5       TH179        BRAF                    V600E   \n",
       "6       TH103         ALK                   fusion   \n",
       "7       TH210         ALK                   fusion   \n",
       "8       TH169        EGFR                    del19   \n",
       "9       TH222        ROS1                   fusion   \n",
       "10      TH238        BRAF                    V600E   \n",
       "11      TH205        EGFR                    L858R   \n",
       "12      TH220         ALK                   fusion   \n",
       "13  TH179_NAT        BRAF                    V600E   \n",
       "14      TH248        EGFR                    del19   \n",
       "15      TH158        EGFR                    del19   \n",
       "16      TH227         ALK  intron 19 rearrangement   \n",
       "17      TH185        EGFR                    L858R   \n",
       "18      TH222        ROS1                   fusion   \n",
       "19      TH185        EGFR                    L858R   \n",
       "20      TH226        EGFR                    del19   \n",
       "21      TH179        BRAF                    V600E   \n",
       "22      TH218        EGFR                    L858R   \n",
       "23      TH103         ALK                   fusion   \n",
       "24      TH171         ALK                   fusion   \n",
       "25      TH179        BRAF                    V600E   \n",
       "26      TH236        EGFR                    del19   \n",
       "27      TH248        EGFR                    del19   \n",
       "28      TH231         ALK                   fusion   \n",
       "29      TH225        KRAS                     G12C   \n",
       "30      TH146        ROS1                ROS1-CD74   \n",
       "31      TH067        EGFR                    del19   \n",
       "32      TH266         ALK                   fusion   \n",
       "33      TH217        EGFR                    del19   \n",
       "34      TH218        EGFR                    L858R   \n",
       "35      TH155        EGFR                    del19   \n",
       "36      TH169        EGFR                    del19   \n",
       "37      TH185        EGFR                    L858R   \n",
       "38      TH158        EGFR                    del19   \n",
       "\n",
       "                                      mutations_found  numTumorCells  \\\n",
       "0   KEAP1 V271M, KEAP1 R272H, RAD21 L324P, MAP2K2 ...            291   \n",
       "1                                      MAP2K2 I220I,               3   \n",
       "2   DROSHA N1255N,  PTPN13 S1001S, KEAP1 L471L,  E...             51   \n",
       "3    EGFR E746_A750delELREA, KEAP1 S592R,  SMARCA4...             14   \n",
       "4            EGFR T903T, SMARCA4 G394W, KEAP1 M161I,               6   \n",
       "5   SMARCA4 P153fs*150, BRAF V600E, STK11 R106R,  ...             55   \n",
       "6                                       KEAP1 L471L,               1   \n",
       "7                                      MAP2K2 I220I,              15   \n",
       "8   STK11 G294C,  EGFR E746_A750delELREA, KEAP1 M5...            177   \n",
       "9                                                 NaN              1   \n",
       "10  KRAS Q61H, MAP2K2 R297R, NFE2L2 G81S, KEAP1 V1...            240   \n",
       "11                                       EGFR L858R,              24   \n",
       "12  ALK A1251D,  KEAP1 V271M, KEAP1 R483C,  ALK--E...            132   \n",
       "13  PTPN13 S1001S,  KEAP1 L471L,  SMARCA4 H508H,  ...              7   \n",
       "14   SMARCA4 L6F,  EGFR K745_A750>T,  TP53 L52P, E...            305   \n",
       "15                                                NaN              7   \n",
       "16       NKX21 P155fs*43, HIF1A A612T, NFE2L2 S414N,              16   \n",
       "17  PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PTP...            962   \n",
       "18                        KEAP1 W497*, PTPRT A1266A,               6   \n",
       "19                                      KEAP1 Y537Y,               2   \n",
       "20   EGFR G42D,  SMARCA4 P153fs*150, KEAP1 A95S, N...            187   \n",
       "21  NFE2L2 T80A, DROSHA E1363D, SMARCA4 A948V,  ST...             63   \n",
       "22                                      KEAP1 V155A,              16   \n",
       "23            EGFR T629T,  KEAP1 L471L,  EGFR R521K,              14   \n",
       "24  TP53 P105P, NOTCH1 T1996M, SMARCA4 R521W,  MAP...            634   \n",
       "25                  SMARCA4 P153fs*150, KEAP1 G417E,              15   \n",
       "26  MAP2K2 R297R,  TP53 S166*, RAD21 V284F,  NFE2L...            107   \n",
       "27  TP53 V175L,  EGFR K745_A750>T,  MAP2K2 R299M, ...             71   \n",
       "28                                      NFE2L2 G31E,               2   \n",
       "29   EGFR R521K,  MAP2K2 I220I,  KRAS G12C,  EGFR ...             21   \n",
       "30                                ROS1--CD74 fusion,               3   \n",
       "31  MAP2K2 F338Y,  TP53 F59F, SMARCA4 R521W, TP53 ...            101   \n",
       "32   TP53 E180K,  ALK--EML4 fusion, BAP1 A140V,  T...             28   \n",
       "33                                                NaN              5   \n",
       "34                                                NaN              1   \n",
       "35                                                NaN             17   \n",
       "36                                                NaN              2   \n",
       "37                                                NaN              1   \n",
       "38                                                NaN              4   \n",
       "\n",
       "    numTumorCells_w_coverage_to_ROI  numTumorCells_clinMut_found  \n",
       "0                                 0                            0  \n",
       "1                                 0                            0  \n",
       "2                                 0                            5  \n",
       "3                                 1                            1  \n",
       "4                                 0                            0  \n",
       "5                                 3                            3  \n",
       "6                                 0                            0  \n",
       "7                                 0                            0  \n",
       "8                                24                           23  \n",
       "9                                 0                            0  \n",
       "10                                2                            2  \n",
       "11                                1                            1  \n",
       "12                                0                            3  \n",
       "13                                0                            0  \n",
       "14                               14                           13  \n",
       "15                                0                            0  \n",
       "16                                0                            0  \n",
       "17                              198                           10  \n",
       "18                                0                            0  \n",
       "19                                0                            0  \n",
       "20                                9                            9  \n",
       "21                                0                            0  \n",
       "22                                0                            0  \n",
       "23                                0                            0  \n",
       "24                                0                            3  \n",
       "25                                0                            0  \n",
       "26                               19                           19  \n",
       "27                                6                            5  \n",
       "28                                0                            0  \n",
       "29                               15                           15  \n",
       "30                                0                            3  \n",
       "31                                0                            0  \n",
       "32                                0                            2  \n",
       "33                                0                            0  \n",
       "34                                0                            0  \n",
       "35                                0                            0  \n",
       "36                                0                            0  \n",
       "37                                0                            0  \n",
       "38                                0                            0  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vali_tbl = pd.read_csv('../Data_input/mutation_input/validationTable_samples_tumor_only_4.19.19.csv')\n",
    "vali_tbl = vali_tbl.drop(['sample'], axis=1) # just drop the sample col entirely\n",
    "vali_tbl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# init empty dict, just a list of the patients we've got\n",
    "patients_dict_sc = {}\n",
    "\n",
    "for idx, row in vali_tbl.iterrows():\n",
    "    patient = vali_tbl.patient[idx]\n",
    "    patients_dict_sc.update({patient : []})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# now fill in with muts\n",
    "for idx, row in vali_tbl.iterrows():\n",
    "    \n",
    "    curr_patient = vali_tbl.patient[idx]\n",
    "    curr_muts = vali_tbl.mutations_found[idx]\n",
    "    \n",
    "    curr_val = patients_dict_sc[curr_patient]\n",
    "    curr_val.append(curr_muts) # add in curr muts\n",
    "    \n",
    "    patients_dict_sc.update({patient : curr_val})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 351,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TH231': ['KEAP1 V271M, KEAP1 R272H, RAD21 L324P, MAP2K2 P472P, TP53 P105P, KEAP1 L231V, MAP2K2 R297R, TP53 G304G,  SOX2 G144R, NKX21 P155fs*43, SMARCA4 Q544P, SOX2 G219V, STK11 K424R, KEAP1 A95S, KEAP1 S592R, MAP2K2 P22S, NFE2L2 N513I,  SMARCA4 P109L, RAD21 A410S, KEAP1 Q284*, NFE2L2 K506N, MAP2K2 Q114*,  SMARCA4 G1162C, MAP2K2 N177T, MYCL C253F, KEAP1 W544C, TP53 Y110C, SMARCA4 R521W, KEAP1 Q46*, RAD21 V284F, SMARCA4 P153fs*150, SMARCA4 A509S, TP53 F155V, KRAS A146P,  NFE2L2 S597fs*>8, PTPRT Q479E,  EGFR L303L,  PTPRT R1368*, KEAP1 S73I, KEAP1 G364D,  RB1 C278R, SMARCA4 L6F, SMARCA4 A948V, PTPRT T329N, PTPRT G276D, BRAF S679N, ',\n",
       "  'NFE2L2 G31E, '],\n",
       " 'TH238_NAT': ['MAP2K2 I220I, '],\n",
       " 'TH103': ['DROSHA N1255N,  PTPN13 S1001S, KEAP1 L471L,  EGFR R521K,  ALK--EML4 fusion,  EGFR T629T,  TP53 E180K,  EGFR Q787Q, HIF1A A612T,  NFE2L2 V417L, KEAP1 A191T, ',\n",
       "  'KEAP1 L471L, ',\n",
       "  ' EGFR T629T,  KEAP1 L471L,  EGFR R521K, '],\n",
       " 'TH226': [' EGFR E746_A750delELREA, KEAP1 S592R,  SMARCA4 G394W, SMARCA4 A509S, KRAS Q61H,  EGFR G42D, ',\n",
       "  ' EGFR G42D,  SMARCA4 P153fs*150, KEAP1 A95S, NFE2L2 E82D, PTPN13 D1736Y,  EGFR E746_A750delELREA, TP53 L111Q, NFE2L2 H416Y, TP53 Q133E, MAP2K2 L37L, SMARCA4 R521W, NFE2L2 D29H, MAP2K2 R297R, NFE2L2 K506N, NKX21 P155fs*43,  MAP2K2 S479R, MAP2K2 R299M, RAD21 D95H, EGFR del19, NKX21 S79L, SOX2 G219V,  EGFR R1100S, KEAP1 C196F, BRAF R682W, KEAP1 W544C, TP53 Y110C,  SMARCA4 G394W, MAP2K1 Y134C,  KEAP1 D78N, KRAS Q61L, KEAP1 G527F, KEAP1 M503K, KEAP1 R272H, KEAP1 R483H, KEAP1 V271M, KEAP1 V369L,  TP53 Y73C, RAD21 A410A, '],\n",
       " 'TH223': [' EGFR T903T, SMARCA4 G394W, KEAP1 M161I, '],\n",
       " 'TH179': ['SMARCA4 P153fs*150, BRAF V600E, STK11 R106R,  TP53 R337C, PTPRT T603K, NFE2L2 L266F, KEAP1 R116W, KEAP1 G195V, TP53 D281E, SMARCA4 A509S, MAP2K2 I220I, PTPN13 E283E, ',\n",
       "  'NFE2L2 T80A, DROSHA E1363D, SMARCA4 A948V,  STK11 I303M, DDR2 F665F, TP53 A7V, KEAP1 S243C, MAP2K2 F338Y, SMARCA4 L6F, DROSHA R1342L, KEAP1 R554Q, TP53 V175L, ERBB4 P800S, TP53 V73fs*50,  KEAP1 A95S, NFE2L2 S414N, RB1 N186fs*6, TP53 V157F, SMARCA4 P153fs*150, MAP2K2 P22S, MAP2K2 D341N, TP53 R123W, ',\n",
       "  ' SMARCA4 P153fs*150, KEAP1 G417E, '],\n",
       " 'TH210': ['MAP2K2 I220I, '],\n",
       " 'TH169': ['STK11 G294C,  EGFR E746_A750delELREA, KEAP1 M503K,  TP53 C9*, EGFR del19, SMARCA4 E1339*, BRAF A762E,  TP53 C44Y,  NKX21 C244C, TP53 R282W,  TP53 R273H, KEAP1 P318L, STK11 K535M, SMARCA4 Q1195H, KEAP1 Y537Y,  TP53 C275F, KEAP1 D479G,  TP53 A276D, NKX21 S79L, SMARCA4 R521W, TP53 P47fs*76,  RAD21 D95H, TP53 Q12*, ',\n",
       "  nan],\n",
       " 'TH222': [nan, 'KEAP1 W497*, PTPRT A1266A, '],\n",
       " 'TH238': ['KRAS Q61H, MAP2K2 R297R, NFE2L2 G81S, KEAP1 V155A,  EGFR S921R, KEAP1 Q75*, KEAP1 R204P, MAP2K2 P109P, STK11 L386L, KRAS G12F, SMARCA4 P153fs*150, SMARCA4 R521P,  TP53 P105P, KEAP1 E117K,  EGFR G42D, BRAF V600E,  TP53 R168K, TP53 R283P, KEAP1 V167F, TP53 F155V,  NKX21 P155fs*43, AKT1 L335L,  TP53 H193L, MAP2K1 S331R, SMARCA4 F1052L,  KEAP1 G419W, ERBB4 R47G,  STK11 G242W, NOTCH1 P2415delP, STK11 Q152*,  TP53 G56G, KEAP1 G430S,  PTPN13 S145F, SMARCA4 R521W, KEAP1 G332C, NOTCH1 G2345V, AKT1 R465C,  KEAP1 P278S, STK11 G294C, STK11 C199F, NFE2L2 H416Y,  SMARCA4 Q331*, TP53 E103K,  RB1 L199fs*2, STK11 I642M, TP53 R123W,  SMARCA4 A1448V,  EGFR F856L, KEAP1 R320W,  TP53 G154fs*16, SMARCA4 H52P, MAP2K1 F53L, PTPRT E318*,  SMARCA4 E1612*,  STK11 D194Y, KEAP1 R116W, NOTCH1 P284P,  EGFR D1014N, KEAP1 G423V, '],\n",
       " 'TH205': [' EGFR L858R, '],\n",
       " 'TH220': ['ALK A1251D,  KEAP1 V271M, KEAP1 R483C,  ALK--EML4 fusion,  STK11 R524S, KEAP1 G417E, TP53 P105P, RAD21 L372L, ALK H1475Q, KEAP1 R272H, TP53 V175L, NFE2L2 E79Q, KEAP1 M110V, ALK K321R, RB1 K548fs*3, RAD21 A410A,  SMARCA4 A1249S,  EGFR Q1020H, KEAP1 M503K, ERBB2 Q943*, KRAS G13D,  TP53 T284P, NKX21 T38M,  SMARCA4 A1448V, AKT1 L335L, '],\n",
       " 'TH179_NAT': ['PTPN13 S1001S,  KEAP1 L471L,  SMARCA4 H508H,  EGFR Q787Q, MAP2K2 I220I,  EGFR N158N, '],\n",
       " 'TH248': [' SMARCA4 L6F,  EGFR K745_A750>T,  TP53 L52P, EGFR del19, AKT1 L335L,  ERBB2 V308M, KEAP1 R204P, NFE2L2 S597fs*>8,  MAP2K1 A76S, HIF1A A612T, SMARCA4 P153fs*150, TP53 V175L, MAP2K2 F338L,  EGFR V843L,  EGFR S811F, SMARCA4 A509S, ERBB2 R128Q,  NKX21 P155fs*43,  NKX21 T38M,  EGFR L858R, KEAP1 T142M,  KEAP1 R460G, NFE2L2 E82D, TP53 Y110C, SMARCA4 G394W, STK11 K424R, AKT1 R465C,  KEAP1 Q75*,  TP53 C277F, ERBB2 Y757Y, NFE2L2 H416Y,  SMARCA4 F1052L, SMARCA4 R521W, MAP2K1 P15S, KEAP1 A40V, KRAS L19F, TP53 A7V, MAP2K2 R297R, KEAP1 R483R, STK11 G56V,  TP53 A276D, MAP2K2 P22S, KEAP1 E117K, TP53 N20N, STK11 H514D, ',\n",
       "  nan,\n",
       "  'TP53 V175L,  EGFR K745_A750>T,  MAP2K2 R299M, KEAP1 R116W, TP53 L52P, RAD21 P510T,  TP53 P105P,  STK11 G174R,  EGFR I91V, EGFR del19, NKX21 P155fs*43, KEAP1 K323fs*5,  MAP2K1 E73E,  SMARCA4 P153fs*150, SMARCA4 Q331*, TP53 G245R, TP53 G244D, TP53 T153A,  EGFR G652G,  SMARCA4 G1162C, KEAP1 K97N, KEAP1 G527F, '],\n",
       " 'TH158': ['PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PTPN13 S1001S,  DROSHA N1255N,  EGFR T903T,  EGFR Q787Q,  TP53 C242S,  TP53 S241F, AKT1 E242E,  EGFR R521K,  EGFR A237V,  EGFR T629T,  TP53 P105P, DDR2 A499A,  HIF1A A612T,  KEAP1 Y537Y,  RAD21 E365Q,  MAP2K2 R299M,  STK11 V113M,  NFE2L2 K506N,  TP53 W91*,  EGFR L1167V,  DROSHA P1102P,  KEAP1 G423V,  KEAP1 A191T,  SMARCA4 G394W,  SMARCA4 G1031V,  NOTCH1 D1698D,  KEAP1 R483R,  RAD21 G221A,  MAP2K2 I220I,  TP53 G56G,  KEAP1 P278S,  TP53 R273C,  NFE2L2 V417L,  TP53 S166*,  TP53 I254S,  KRAS A11A,  EGFR L858R,  KEAP1 T142M,  MAP2K2 R297R, KEAP1 P105S,  TP53 R282W,  PTPN13 G962D,  KEAP1 S592R,  EGFR G42D,  TP53 G304G,  NFE2L2 E79Q,  MAP2K2 P22S,  KEAP1 Q284*,  KEAP1 E611D,  KRAS A146P,  KEAP1 D389Y,  NFE2L2 S414N,  EGFR V843L,  KRAS G13D,  TP53 E339*,  KEAP1 G364C,  TP53 R175H,  KEAP1 D236H,  KEAP1 R415C,  TP53 R49C,  PTPN13 P940P,  KEAP1 G419W,  KEAP1 F246L,  NFE2L2 Q26L,  BRAF L597V,  TP53 E163E,  KEAP1 C196F,  NFE2L2 W24R,  RAD21 L192V,  MAP2K2 F338Y,  TP53 Q12*,  MAP2K1 G128V,  KRAS Q61H,  STK11 Q315Q,  STK11 V425V,  SMARCA4 R521W,  TP53 V31V,  SMARCA4 Q82E,  KEAP1 R234P,  TP53 P153fs*28,  KEAP1 V155A,  MAP2K2 G14C,  KEAP1 R483H,  SMARCA4 R973L,  SMARCA4 F1052L,  STK11 C167R,  TP53 A7V,  EGFR P589L,  TP53 V175L, RAD21 A570S,  TP53 L62F,  NOTCH1 C438Y,  RAD21 P510T,  TP53 G245D,  TP53 V182V,  TP53 Y107*,  KEAP1 R320P,  MAP2K1 M146I,  SMARCA4 A1448V,  EGFR I1093M,  KEAP1 R272H,  NFE2L2 L476L,  KEAP1 E205*,  SMARCA4 A948V,  SMARCA4 E1023Q,  TP53 T125P,  BRAF A762E,  SMARCA4 L1161fs*3,  STK11 A301A, RAD21 A410A, DROSHA E1275*,  MAP2K1 P15S,  SMARCA4 G1162C,  KEAP1 M110I,  ERBB2 Q943*,  NFE2L2 P477S,  ERBB4 A662S,  KEAP1 E117K,  SMARCA4 T1129S,  TP53 G154fs*16,  MAP2K1 A76S,  KRAS G12F,  NFE2L2 H416Y,  NFE2L2 E82D, STK11 R106R,  TP53 I162F,  TP53 R119R,  MAP2K1 G75G,  DROSHA G851V,  TP53 S281C,  TP53 R282R,  KDR T279T,  SMARCA4 A509S,  MAP2K1 C121S,  SMARCA4 E1496*,  KEAP1 D479G,  KEAP1 A331A,  KRAS A146V,  TP53 A289T,  DDR2 G579W,  NFE2L2 D38H,  NFE2L2 D77H, ',\n",
       "  'KEAP1 Y537Y, ',\n",
       "  nan,\n",
       "  nan],\n",
       " 'TH227': ['NKX21 P155fs*43, HIF1A A612T, NFE2L2 S414N, '],\n",
       " 'TH185': ['PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PTPN13 S1001S,  DROSHA N1255N,  EGFR T903T,  EGFR Q787Q,  TP53 C242S,  TP53 S241F, AKT1 E242E,  EGFR R521K,  EGFR A237V,  EGFR T629T,  TP53 P105P, DDR2 A499A,  HIF1A A612T,  KEAP1 Y537Y,  RAD21 E365Q,  MAP2K2 R299M,  STK11 V113M,  NFE2L2 K506N,  TP53 W91*,  EGFR L1167V,  DROSHA P1102P,  KEAP1 G423V,  KEAP1 A191T,  SMARCA4 G394W,  SMARCA4 G1031V,  NOTCH1 D1698D,  KEAP1 R483R,  RAD21 G221A,  MAP2K2 I220I,  TP53 G56G,  KEAP1 P278S,  TP53 R273C,  NFE2L2 V417L,  TP53 S166*,  TP53 I254S,  KRAS A11A,  EGFR L858R,  KEAP1 T142M,  MAP2K2 R297R, KEAP1 P105S,  TP53 R282W,  PTPN13 G962D,  KEAP1 S592R,  EGFR G42D,  TP53 G304G,  NFE2L2 E79Q,  MAP2K2 P22S,  KEAP1 Q284*,  KEAP1 E611D,  KRAS A146P,  KEAP1 D389Y,  NFE2L2 S414N,  EGFR V843L,  KRAS G13D,  TP53 E339*,  KEAP1 G364C,  TP53 R175H,  KEAP1 D236H,  KEAP1 R415C,  TP53 R49C,  PTPN13 P940P,  KEAP1 G419W,  KEAP1 F246L,  NFE2L2 Q26L,  BRAF L597V,  TP53 E163E,  KEAP1 C196F,  NFE2L2 W24R,  RAD21 L192V,  MAP2K2 F338Y,  TP53 Q12*,  MAP2K1 G128V,  KRAS Q61H,  STK11 Q315Q,  STK11 V425V,  SMARCA4 R521W,  TP53 V31V,  SMARCA4 Q82E,  KEAP1 R234P,  TP53 P153fs*28,  KEAP1 V155A,  MAP2K2 G14C,  KEAP1 R483H,  SMARCA4 R973L,  SMARCA4 F1052L,  STK11 C167R,  TP53 A7V,  EGFR P589L,  TP53 V175L, RAD21 A570S,  TP53 L62F,  NOTCH1 C438Y,  RAD21 P510T,  TP53 G245D,  TP53 V182V,  TP53 Y107*,  KEAP1 R320P,  MAP2K1 M146I,  SMARCA4 A1448V,  EGFR I1093M,  KEAP1 R272H,  NFE2L2 L476L,  KEAP1 E205*,  SMARCA4 A948V,  SMARCA4 E1023Q,  TP53 T125P,  BRAF A762E,  SMARCA4 L1161fs*3,  STK11 A301A, RAD21 A410A, DROSHA E1275*,  MAP2K1 P15S,  SMARCA4 G1162C,  KEAP1 M110I,  ERBB2 Q943*,  NFE2L2 P477S,  ERBB4 A662S,  KEAP1 E117K,  SMARCA4 T1129S,  TP53 G154fs*16,  MAP2K1 A76S,  KRAS G12F,  NFE2L2 H416Y,  NFE2L2 E82D, STK11 R106R,  TP53 I162F,  TP53 R119R,  MAP2K1 G75G,  DROSHA G851V,  TP53 S281C,  TP53 R282R,  KDR T279T,  SMARCA4 A509S,  MAP2K1 C121S,  SMARCA4 E1496*,  KEAP1 D479G,  KEAP1 A331A,  KRAS A146V,  TP53 A289T,  DDR2 G579W,  NFE2L2 D38H,  NFE2L2 D77H, ',\n",
       "  'KEAP1 Y537Y, ',\n",
       "  nan,\n",
       "  nan],\n",
       " 'TH218': ['KEAP1 V155A, ', nan],\n",
       " 'TH171': ['TP53 P105P, NOTCH1 T1996M, SMARCA4 R521W,  MAP2K1 R108Q, RAD21 A410A, ALK L363L, MAP2K2 Q126Q, SMARCA4 P153fs*150, NFE2L2 K506N,  EGFR R1100S, MAP2K2 R297R, TP53 R123W,  NFE2L2 G81S,  EGFR Q1020H, TP63 T144T, TP53 P180P, NFE2L2 V417L,  EGFR R831H, DROSHA P1292L, KEAP1 I185N,  NFE2L2 H416Y, KEAP1 G527F, NFE2L2 G31R, RAD21 E98Q, KEAP1 R483C, KEAP1 R483H,  TP53 Y110C, STK11 K62*, KEAP1 R116W,  KEAP1 R272H, KRAS A146V, TP53 R168K, MAP2K2 F338Y, TP63 R244L,  NOTCH1 D2091D,  NFE2L2 S414N, TP53 G56G,  SMARCA4 L6F,  MAP2K2 A265T, MAP2K1 F53L,  STK11 H514D, KEAP1 D422N, KEAP1 F139L, RB1 N186fs*6, NFE2L2 L266F, AKT1 R465C, KEAP1 W544C, RAD21 D95H, MAP2K2 R299M, KEAP1 Q75*,  NFE2L2 L143R, KEAP1 R234P, TP53 V175L, TP53 N20N, TP53 G146W,  ALK--EML4 fusion, KEAP1 G364D, KRAS G13C, KEAP1 Y375H, TP53 A276D, TP53 G154fs*16, KRAS G12A,  EGFR V1142V,  TP53 E258D, NFE2L2 R34G, HIF1A M250I, KEAP1 P318L,  STK11 D613H,  KRAS C118S,  TP53 E72*, RAD21 M254I, NFE2L2 S94*, KEAP1 G423V, STK11 R106R, HIF1A A612T, NFE2L2 D77G, TP63 S184W, PTPN13 E283E, KEAP1 D235A, KRAS C185S, TP53 C149W, NFE2L2 S136F, TP53 L348F, MAP2K2 D341N,  RAD21 K596E, KEAP1 R460S, TP53 G304G, TP53 F155V, ALK A1251D, KEAP1 V123L,  SMARCA4 G1162C, MAP2K2 G303W, STK11 A241P, KEAP1 S73I, TP63 C339F,  RAD21 A570S, TP63 A227S, SMARCA4 Q383*, TP53 W271C,  EGFR G42D, TP63 F156F, KEAP1 D479G, KEAP1 D389Y, TP53 L217L,  TP53 A159P, MAP2K2 P109P, KEAP1 S592R, SMARCA4 E1612*,  KEAP1 V271M, KEAP1 G419W, '],\n",
       " 'TH236': ['MAP2K2 R297R,  TP53 S166*, RAD21 V284F,  NFE2L2 K506N,  EGFR E746_A750delELREA,  RAD21 A410A,  EGFR S442I, EGFR del19,  NKX21 P155fs*43, TP53 Q100*,  TP53 P105P, KEAP1 V418L, TP53 Y110C, KEAP1 S592R, KEAP1 G423V, RAD21 M254I, SMARCA4 L6F, NKX21 G164S, KEAP1 Q284*,  NFE2L2 L266F,  NKX21 L395L,  KEAP1 R483H, NKX21 P189P,  MAP2K1 C121C,  STK11 E199*,  SMARCA4 R1192P, '],\n",
       " 'TH225': [' EGFR R521K,  MAP2K2 I220I,  KRAS G12C,  EGFR T903T,  SMARCA4 H508H,  KEAP1 S592R,  DDR2 L420L,  EGFR T629T,  NKX21 C244C,  TP53 F155V,  TP53 T125T,  DROSHA E1193*,  PTPN13 S1001S,  AKT1 E242E,  TP53 R175H,  NFE2L2 S597fs*>8, '],\n",
       " 'TH146': [' ROS1--CD74 fusion, '],\n",
       " 'TH067': ['MAP2K2 F338Y,  TP53 F59F, SMARCA4 R521W, TP53 R123W, KEAP1 D389Y, KRAS G13V, KEAP1 Q75*, KDR A1050V, TP53 L157V, KDR E489G,  KDR T279T, DROSHA E1193*, KEAP1 M161I, MAP2K2 D341N, KEAP1 C171F, TP53 R213*, STK11 G58R, KEAP1 N397delN,  EGFR F856L, KEAP1 S592R, '],\n",
       " 'TH266': [' TP53 E180K,  ALK--EML4 fusion, BAP1 A140V,  TP53 A7V, NOTCH1 A2019P, NFE2L2 P477S, MAP2K2 R297R,  TP63 A227S, KEAP1 K323fs*5, SMARCA4 P153fs*150, DDR2 G774G, KEAP1 T142M,  STK11 C167R, '],\n",
       " 'TH217': [nan],\n",
       " 'TH155': [nan]}"
      ]
     },
     "execution_count": 351,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# looks good \n",
    "patients_dict_sc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'TH205', 'TH169', 'TH067', 'TH155', 'TH171', 'TH210', 'TH238', 'TH179', 'TH220', 'TH231', 'TH226'}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "11"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# overlap here is only 11...not great\n",
    "print(set(patients_dict_sc.keys()).intersection(set(patients_dict_wes.keys())))\n",
    "len(set(patients_dict_sc.keys()).intersection(set(patients_dict_wes.keys())))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>driver_gene</th>\n",
       "      <th>driver_mutation</th>\n",
       "      <th>mutations_found</th>\n",
       "      <th>numTumorCells</th>\n",
       "      <th>numTumorCells_w_coverage_to_ROI</th>\n",
       "      <th>numTumorCells_clinMut_found</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TH238_NAT</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH225</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH146</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH236</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH171</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH210</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH227</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH218</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH185</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH222</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH158</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH231</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH226</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH155</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH223</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH220</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179_NAT</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH103</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH205</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH217</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH266</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH238</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH248</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          driver_gene driver_mutation mutations_found numTumorCells  \\\n",
       "TH238_NAT         NaN             NaN             NaN           NaN   \n",
       "TH225             NaN             NaN             NaN           NaN   \n",
       "TH146             NaN             NaN             NaN           NaN   \n",
       "TH236             NaN             NaN             NaN           NaN   \n",
       "TH171             NaN             NaN             NaN           NaN   \n",
       "TH210             NaN             NaN             NaN           NaN   \n",
       "TH227             NaN             NaN             NaN           NaN   \n",
       "TH218             NaN             NaN             NaN           NaN   \n",
       "TH185             NaN             NaN             NaN           NaN   \n",
       "TH222             NaN             NaN             NaN           NaN   \n",
       "TH179             NaN             NaN             NaN           NaN   \n",
       "TH158             NaN             NaN             NaN           NaN   \n",
       "TH231             NaN             NaN             NaN           NaN   \n",
       "TH226             NaN             NaN             NaN           NaN   \n",
       "TH155             NaN             NaN             NaN           NaN   \n",
       "TH223             NaN             NaN             NaN           NaN   \n",
       "TH220             NaN             NaN             NaN           NaN   \n",
       "TH179_NAT         NaN             NaN             NaN           NaN   \n",
       "TH103             NaN             NaN             NaN           NaN   \n",
       "TH205             NaN             NaN             NaN           NaN   \n",
       "TH169             NaN             NaN             NaN           NaN   \n",
       "TH067             NaN             NaN             NaN           NaN   \n",
       "TH217             NaN             NaN             NaN           NaN   \n",
       "TH266             NaN             NaN             NaN           NaN   \n",
       "TH238             NaN             NaN             NaN           NaN   \n",
       "TH248             NaN             NaN             NaN           NaN   \n",
       "\n",
       "          numTumorCells_w_coverage_to_ROI numTumorCells_clinMut_found  \n",
       "TH238_NAT                             NaN                         NaN  \n",
       "TH225                                 NaN                         NaN  \n",
       "TH146                                 NaN                         NaN  \n",
       "TH236                                 NaN                         NaN  \n",
       "TH171                                 NaN                         NaN  \n",
       "TH210                                 NaN                         NaN  \n",
       "TH227                                 NaN                         NaN  \n",
       "TH218                                 NaN                         NaN  \n",
       "TH185                                 NaN                         NaN  \n",
       "TH222                                 NaN                         NaN  \n",
       "TH179                                 NaN                         NaN  \n",
       "TH158                                 NaN                         NaN  \n",
       "TH231                                 NaN                         NaN  \n",
       "TH226                                 NaN                         NaN  \n",
       "TH155                                 NaN                         NaN  \n",
       "TH223                                 NaN                         NaN  \n",
       "TH220                                 NaN                         NaN  \n",
       "TH179_NAT                             NaN                         NaN  \n",
       "TH103                                 NaN                         NaN  \n",
       "TH205                                 NaN                         NaN  \n",
       "TH169                                 NaN                         NaN  \n",
       "TH067                                 NaN                         NaN  \n",
       "TH217                                 NaN                         NaN  \n",
       "TH266                                 NaN                         NaN  \n",
       "TH238                                 NaN                         NaN  \n",
       "TH248                                 NaN                         NaN  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# lets make a new df entirely \n",
    "new_cols = list(vali_tbl.columns)\n",
    "new_cols.remove('patient') # remove patient col, bc that'll be our new index \n",
    "patient_vali_tbl = pd.DataFrame(index=set(vali_tbl.patient), columns=new_cols)\n",
    "patient_vali_tbl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>driver_gene</th>\n",
       "      <th>driver_mutation</th>\n",
       "      <th>muts_found_sc</th>\n",
       "      <th>muts_found_WES</th>\n",
       "      <th>numTumorCells</th>\n",
       "      <th>numTumorCells_w_coverage_to_ROI</th>\n",
       "      <th>numTumorCells_clinMut_found</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TH238_NAT</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH225</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH146</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH236</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH171</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH210</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH227</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH218</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH185</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH222</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH158</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH231</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH226</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH155</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH223</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH220</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179_NAT</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH103</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH205</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH217</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH266</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH238</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH248</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          driver_gene driver_mutation muts_found_sc muts_found_WES  \\\n",
       "TH238_NAT         NaN             NaN           NaN                  \n",
       "TH225             NaN             NaN           NaN                  \n",
       "TH146             NaN             NaN           NaN                  \n",
       "TH236             NaN             NaN           NaN                  \n",
       "TH171             NaN             NaN           NaN                  \n",
       "TH210             NaN             NaN           NaN                  \n",
       "TH227             NaN             NaN           NaN                  \n",
       "TH218             NaN             NaN           NaN                  \n",
       "TH185             NaN             NaN           NaN                  \n",
       "TH222             NaN             NaN           NaN                  \n",
       "TH179             NaN             NaN           NaN                  \n",
       "TH158             NaN             NaN           NaN                  \n",
       "TH231             NaN             NaN           NaN                  \n",
       "TH226             NaN             NaN           NaN                  \n",
       "TH155             NaN             NaN           NaN                  \n",
       "TH223             NaN             NaN           NaN                  \n",
       "TH220             NaN             NaN           NaN                  \n",
       "TH179_NAT         NaN             NaN           NaN                  \n",
       "TH103             NaN             NaN           NaN                  \n",
       "TH205             NaN             NaN           NaN                  \n",
       "TH169             NaN             NaN           NaN                  \n",
       "TH067             NaN             NaN           NaN                  \n",
       "TH217             NaN             NaN           NaN                  \n",
       "TH266             NaN             NaN           NaN                  \n",
       "TH238             NaN             NaN           NaN                  \n",
       "TH248             NaN             NaN           NaN                  \n",
       "\n",
       "          numTumorCells numTumorCells_w_coverage_to_ROI  \\\n",
       "TH238_NAT           NaN                             NaN   \n",
       "TH225               NaN                             NaN   \n",
       "TH146               NaN                             NaN   \n",
       "TH236               NaN                             NaN   \n",
       "TH171               NaN                             NaN   \n",
       "TH210               NaN                             NaN   \n",
       "TH227               NaN                             NaN   \n",
       "TH218               NaN                             NaN   \n",
       "TH185               NaN                             NaN   \n",
       "TH222               NaN                             NaN   \n",
       "TH179               NaN                             NaN   \n",
       "TH158               NaN                             NaN   \n",
       "TH231               NaN                             NaN   \n",
       "TH226               NaN                             NaN   \n",
       "TH155               NaN                             NaN   \n",
       "TH223               NaN                             NaN   \n",
       "TH220               NaN                             NaN   \n",
       "TH179_NAT           NaN                             NaN   \n",
       "TH103               NaN                             NaN   \n",
       "TH205               NaN                             NaN   \n",
       "TH169               NaN                             NaN   \n",
       "TH067               NaN                             NaN   \n",
       "TH217               NaN                             NaN   \n",
       "TH266               NaN                             NaN   \n",
       "TH238               NaN                             NaN   \n",
       "TH248               NaN                             NaN   \n",
       "\n",
       "          numTumorCells_clinMut_found  \n",
       "TH238_NAT                         NaN  \n",
       "TH225                             NaN  \n",
       "TH146                             NaN  \n",
       "TH236                             NaN  \n",
       "TH171                             NaN  \n",
       "TH210                             NaN  \n",
       "TH227                             NaN  \n",
       "TH218                             NaN  \n",
       "TH185                             NaN  \n",
       "TH222                             NaN  \n",
       "TH179                             NaN  \n",
       "TH158                             NaN  \n",
       "TH231                             NaN  \n",
       "TH226                             NaN  \n",
       "TH155                             NaN  \n",
       "TH223                             NaN  \n",
       "TH220                             NaN  \n",
       "TH179_NAT                         NaN  \n",
       "TH103                             NaN  \n",
       "TH205                             NaN  \n",
       "TH169                             NaN  \n",
       "TH067                             NaN  \n",
       "TH217                             NaN  \n",
       "TH266                             NaN  \n",
       "TH238                             NaN  \n",
       "TH248                             NaN  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# lets rename 'mutations_found' col(s)\n",
    "patient_vali_tbl = patient_vali_tbl.rename(columns={'mutations_found':'muts_found_sc'})\n",
    "patient_vali_tbl['muts_found_WES'] = \"\"\n",
    "\n",
    "# change up col order \n",
    "new_cols = ['driver_gene', 'driver_mutation', 'muts_found_sc', 'muts_found_WES',  \n",
    "            'numTumorCells', 'numTumorCells_w_coverage_to_ROI', \n",
    "            'numTumorCells_clinMut_found'] \n",
    "\n",
    "patient_vali_tbl = patient_vali_tbl[new_cols]\n",
    "patient_vali_tbl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# now need to fill in, based on values from vali_tbl\n",
    "for idx, row in patient_vali_tbl.iterrows():\n",
    "    old_row = vali_tbl[vali_tbl.patient == idx]\n",
    "    \n",
    "    gene = list(old_row['driver_gene'])[0]\n",
    "    mut = list(old_row['driver_mutation'])[0]\n",
    "    \n",
    "    patient_vali_tbl.loc[idx, 'driver_gene'] = gene\n",
    "    patient_vali_tbl.loc[idx, 'driver_mutation'] = mut\n",
    "    \n",
    "    if idx in patients_dict_sc:\n",
    "        sc_muts = patients_dict_sc[idx]\n",
    "        patient_vali_tbl.at[idx, 'muts_found_sc'] = sc_muts\n",
    "        \n",
    "    if idx in patients_dict_wes:\n",
    "        wes_muts = patients_dict_wes[idx]\n",
    "        patient_vali_tbl.at[idx, 'muts_found_WES'] = wes_muts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>driver_gene</th>\n",
       "      <th>driver_mutation</th>\n",
       "      <th>muts_found_sc</th>\n",
       "      <th>muts_found_WES</th>\n",
       "      <th>numTumorCells</th>\n",
       "      <th>numTumorCells_w_coverage_to_ROI</th>\n",
       "      <th>numTumorCells_clinMut_found</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TH238_NAT</th>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>[MAP2K2 I220I, ]</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH225</th>\n",
       "      <td>KRAS</td>\n",
       "      <td>G12C</td>\n",
       "      <td>[ EGFR R521K,  MAP2K2 I220I,  KRAS G12C,  EGFR...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH146</th>\n",
       "      <td>ROS1</td>\n",
       "      <td>ROS1-CD74</td>\n",
       "      <td>[ ROS1--CD74 fusion, ]</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH236</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[MAP2K2 R297R,  TP53 S166*, RAD21 V284F,  NFE2...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH171</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[TP53 P105P, NOTCH1 T1996M, SMARCA4 R521W,  MA...</td>\n",
       "      <td>[DROSHA_S321L, SPTA1_K1693Q, TP53_P72R]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH210</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[MAP2K2 I220I, ]</td>\n",
       "      <td>[PIK3CA_I391M, RET_G691S, ROS1_N2240K]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH227</th>\n",
       "      <td>ALK</td>\n",
       "      <td>intron 19 rearrangement</td>\n",
       "      <td>[NKX21 P155fs*43, HIF1A A612T, NFE2L2 S414N, ]</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH218</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>[KEAP1 V155A, , nan]</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH185</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>[PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PT...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH222</th>\n",
       "      <td>ROS1</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[nan, KEAP1 W497*, PTPRT A1266A, ]</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179</th>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>[SMARCA4 P153fs*150, BRAF V600E, STK11 R106R, ...</td>\n",
       "      <td>[BRAF_V600E, BRCA2_N372H, DROSHA_S321L, RET_G6...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH158</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PT...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH231</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[KEAP1 V271M, KEAP1 R272H, RAD21 L324P, MAP2K2...</td>\n",
       "      <td>[BRCA2_N372H, DROSHA_S321L, ROS1_K2228Q, ROS1_...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH226</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[ EGFR E746_A750delELREA, KEAP1 S592R,  SMARCA...</td>\n",
       "      <td>[EGFR_K745_A750&gt;T, EGFR_R521K, ERBB2_I655V, RE...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH155</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[nan]</td>\n",
       "      <td>[DROSHA_S321L, EGFR_K745_A750&gt;T, EGFR_L747_T75...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH223</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[ EGFR T903T, SMARCA4 G394W, KEAP1 M161I, ]</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH220</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[ALK A1251D,  KEAP1 V271M, KEAP1 R483C,  ALK--...</td>\n",
       "      <td>[EGFR_R521K, ERBB2_I655V, ROS1_K2228Q, ROS1_S2...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179_NAT</th>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>[PTPN13 S1001S,  KEAP1 L471L,  SMARCA4 H508H, ...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH103</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[DROSHA N1255N,  PTPN13 S1001S, KEAP1 L471L,  ...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH205</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>[ EGFR L858R, ]</td>\n",
       "      <td>[BRCA2_N372H, EGFR_D1014N, EGFR_R521K, ERBB2_I...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[STK11 G294C,  EGFR E746_A750delELREA, KEAP1 M...</td>\n",
       "      <td>[BRCA2_N289H, DROSHA_S321L, EGFR_K745_A750&gt;T, ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[MAP2K2 F338Y,  TP53 F59F, SMARCA4 R521W, TP53...</td>\n",
       "      <td>[BRCA2_N372H, DROSHA_S321L, EGFR_K745_A750&gt;T, ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH217</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[nan]</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH266</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[ TP53 E180K,  ALK--EML4 fusion, BAP1 A140V,  ...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH238</th>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>[KRAS Q61H, MAP2K2 R297R, NFE2L2 G81S, KEAP1 V...</td>\n",
       "      <td>[BRCA2_N372H, DROSHA_S321L, EGFR_R521K, RET_G6...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH248</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[ SMARCA4 L6F,  EGFR K745_A750&gt;T,  TP53 L52P, ...</td>\n",
       "      <td></td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          driver_gene          driver_mutation  \\\n",
       "TH238_NAT        BRAF                    V600E   \n",
       "TH225            KRAS                     G12C   \n",
       "TH146            ROS1                ROS1-CD74   \n",
       "TH236            EGFR                    del19   \n",
       "TH171             ALK                   fusion   \n",
       "TH210             ALK                   fusion   \n",
       "TH227             ALK  intron 19 rearrangement   \n",
       "TH218            EGFR                    L858R   \n",
       "TH185            EGFR                    L858R   \n",
       "TH222            ROS1                   fusion   \n",
       "TH179            BRAF                    V600E   \n",
       "TH158            EGFR                    del19   \n",
       "TH231             ALK                   fusion   \n",
       "TH226            EGFR                    del19   \n",
       "TH155            EGFR                    del19   \n",
       "TH223            EGFR                    del19   \n",
       "TH220             ALK                   fusion   \n",
       "TH179_NAT        BRAF                    V600E   \n",
       "TH103             ALK                   fusion   \n",
       "TH205            EGFR                    L858R   \n",
       "TH169            EGFR                    del19   \n",
       "TH067            EGFR                    del19   \n",
       "TH217            EGFR                    del19   \n",
       "TH266             ALK                   fusion   \n",
       "TH238            BRAF                    V600E   \n",
       "TH248            EGFR                    del19   \n",
       "\n",
       "                                               muts_found_sc  \\\n",
       "TH238_NAT                                   [MAP2K2 I220I, ]   \n",
       "TH225      [ EGFR R521K,  MAP2K2 I220I,  KRAS G12C,  EGFR...   \n",
       "TH146                                 [ ROS1--CD74 fusion, ]   \n",
       "TH236      [MAP2K2 R297R,  TP53 S166*, RAD21 V284F,  NFE2...   \n",
       "TH171      [TP53 P105P, NOTCH1 T1996M, SMARCA4 R521W,  MA...   \n",
       "TH210                                       [MAP2K2 I220I, ]   \n",
       "TH227         [NKX21 P155fs*43, HIF1A A612T, NFE2L2 S414N, ]   \n",
       "TH218                                   [KEAP1 V155A, , nan]   \n",
       "TH185      [PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PT...   \n",
       "TH222                     [nan, KEAP1 W497*, PTPRT A1266A, ]   \n",
       "TH179      [SMARCA4 P153fs*150, BRAF V600E, STK11 R106R, ...   \n",
       "TH158      [PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PT...   \n",
       "TH231      [KEAP1 V271M, KEAP1 R272H, RAD21 L324P, MAP2K2...   \n",
       "TH226      [ EGFR E746_A750delELREA, KEAP1 S592R,  SMARCA...   \n",
       "TH155                                                  [nan]   \n",
       "TH223            [ EGFR T903T, SMARCA4 G394W, KEAP1 M161I, ]   \n",
       "TH220      [ALK A1251D,  KEAP1 V271M, KEAP1 R483C,  ALK--...   \n",
       "TH179_NAT  [PTPN13 S1001S,  KEAP1 L471L,  SMARCA4 H508H, ...   \n",
       "TH103      [DROSHA N1255N,  PTPN13 S1001S, KEAP1 L471L,  ...   \n",
       "TH205                                        [ EGFR L858R, ]   \n",
       "TH169      [STK11 G294C,  EGFR E746_A750delELREA, KEAP1 M...   \n",
       "TH067      [MAP2K2 F338Y,  TP53 F59F, SMARCA4 R521W, TP53...   \n",
       "TH217                                                  [nan]   \n",
       "TH266      [ TP53 E180K,  ALK--EML4 fusion, BAP1 A140V,  ...   \n",
       "TH238      [KRAS Q61H, MAP2K2 R297R, NFE2L2 G81S, KEAP1 V...   \n",
       "TH248      [ SMARCA4 L6F,  EGFR K745_A750>T,  TP53 L52P, ...   \n",
       "\n",
       "                                              muts_found_WES numTumorCells  \\\n",
       "TH238_NAT                                                              NaN   \n",
       "TH225                                                                  NaN   \n",
       "TH146                                                                  NaN   \n",
       "TH236                                                                  NaN   \n",
       "TH171                [DROSHA_S321L, SPTA1_K1693Q, TP53_P72R]           NaN   \n",
       "TH210                 [PIK3CA_I391M, RET_G691S, ROS1_N2240K]           NaN   \n",
       "TH227                                                                  NaN   \n",
       "TH218                                                                  NaN   \n",
       "TH185                                                                  NaN   \n",
       "TH222                                                                  NaN   \n",
       "TH179      [BRAF_V600E, BRCA2_N372H, DROSHA_S321L, RET_G6...           NaN   \n",
       "TH158                                                                  NaN   \n",
       "TH231      [BRCA2_N372H, DROSHA_S321L, ROS1_K2228Q, ROS1_...           NaN   \n",
       "TH226      [EGFR_K745_A750>T, EGFR_R521K, ERBB2_I655V, RE...           NaN   \n",
       "TH155      [DROSHA_S321L, EGFR_K745_A750>T, EGFR_L747_T75...           NaN   \n",
       "TH223                                                                  NaN   \n",
       "TH220      [EGFR_R521K, ERBB2_I655V, ROS1_K2228Q, ROS1_S2...           NaN   \n",
       "TH179_NAT                                                              NaN   \n",
       "TH103                                                                  NaN   \n",
       "TH205      [BRCA2_N372H, EGFR_D1014N, EGFR_R521K, ERBB2_I...           NaN   \n",
       "TH169      [BRCA2_N289H, DROSHA_S321L, EGFR_K745_A750>T, ...           NaN   \n",
       "TH067      [BRCA2_N372H, DROSHA_S321L, EGFR_K745_A750>T, ...           NaN   \n",
       "TH217                                                                  NaN   \n",
       "TH266                                                                  NaN   \n",
       "TH238      [BRCA2_N372H, DROSHA_S321L, EGFR_R521K, RET_G6...           NaN   \n",
       "TH248                                                                  NaN   \n",
       "\n",
       "          numTumorCells_w_coverage_to_ROI numTumorCells_clinMut_found  \n",
       "TH238_NAT                             NaN                         NaN  \n",
       "TH225                                 NaN                         NaN  \n",
       "TH146                                 NaN                         NaN  \n",
       "TH236                                 NaN                         NaN  \n",
       "TH171                                 NaN                         NaN  \n",
       "TH210                                 NaN                         NaN  \n",
       "TH227                                 NaN                         NaN  \n",
       "TH218                                 NaN                         NaN  \n",
       "TH185                                 NaN                         NaN  \n",
       "TH222                                 NaN                         NaN  \n",
       "TH179                                 NaN                         NaN  \n",
       "TH158                                 NaN                         NaN  \n",
       "TH231                                 NaN                         NaN  \n",
       "TH226                                 NaN                         NaN  \n",
       "TH155                                 NaN                         NaN  \n",
       "TH223                                 NaN                         NaN  \n",
       "TH220                                 NaN                         NaN  \n",
       "TH179_NAT                             NaN                         NaN  \n",
       "TH103                                 NaN                         NaN  \n",
       "TH205                                 NaN                         NaN  \n",
       "TH169                                 NaN                         NaN  \n",
       "TH067                                 NaN                         NaN  \n",
       "TH217                                 NaN                         NaN  \n",
       "TH266                                 NaN                         NaN  \n",
       "TH238                                 NaN                         NaN  \n",
       "TH248                                 NaN                         NaN  "
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# success!!\n",
    "patient_vali_tbl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "#//////////////////////////////////////////////////////////////////////////////\n",
    "#//////////////////////////////////////////////////////////////////////////////    \n",
    "#////////////////////    fill in the rest of the cols     ///////////////////// \n",
    "#//////////////////////////////////////////////////////////////////////////////"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TH231': 293,\n",
       " 'TH238_NAT': 3,\n",
       " 'TH103': 66,\n",
       " 'TH226': 201,\n",
       " 'TH223': 6,\n",
       " 'TH179': 133,\n",
       " 'TH210': 15,\n",
       " 'TH169': 179,\n",
       " 'TH222': 7,\n",
       " 'TH238': 240,\n",
       " 'TH205': 24,\n",
       " 'TH220': 132,\n",
       " 'TH179_NAT': 7,\n",
       " 'TH248': 376,\n",
       " 'TH158': 11,\n",
       " 'TH227': 16,\n",
       " 'TH185': 965,\n",
       " 'TH218': 17,\n",
       " 'TH171': 634,\n",
       " 'TH236': 107,\n",
       " 'TH225': 21,\n",
       " 'TH146': 3,\n",
       " 'TH067': 101,\n",
       " 'TH266': 28,\n",
       " 'TH217': 5,\n",
       " 'TH155': 17}"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create dict in the same way, with numTumorCells\n",
    "    # init empty dict, just a list of the patients we've got\n",
    "numTumorCells_d = {}\n",
    "\n",
    "for idx, row in vali_tbl.iterrows():\n",
    "    patient = vali_tbl.patient[idx]\n",
    "    numTumorCells_d.update({patient : 0})\n",
    "    \n",
    "\n",
    "    \n",
    "# now fill in\n",
    "for idx, row in vali_tbl.iterrows():\n",
    "    \n",
    "    curr_patient = vali_tbl.patient[idx]\n",
    "    curr_num = vali_tbl.numTumorCells[idx]\n",
    "    curr_val = numTumorCells_d[curr_patient]\n",
    "    curr_val += curr_num # add in curr muts\n",
    " \n",
    "    new_d = {curr_patient:curr_val}\n",
    "    numTumorCells_d.update(new_d)\n",
    "    \n",
    "numTumorCells_d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patient</th>\n",
       "      <th>driver_gene</th>\n",
       "      <th>driver_mutation</th>\n",
       "      <th>mutations_found</th>\n",
       "      <th>numTumorCells</th>\n",
       "      <th>numTumorCells_w_coverage_to_ROI</th>\n",
       "      <th>numTumorCells_clinMut_found</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TH231</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>KEAP1 V271M, KEAP1 R272H, RAD21 L324P, MAP2K2 ...</td>\n",
       "      <td>291</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TH238_NAT</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>MAP2K2 I220I,</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TH103</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>DROSHA N1255N,  PTPN13 S1001S, KEAP1 L471L,  E...</td>\n",
       "      <td>51</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>TH226</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>EGFR E746_A750delELREA, KEAP1 S592R,  SMARCA4...</td>\n",
       "      <td>14</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>TH223</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>EGFR T903T, SMARCA4 G394W, KEAP1 M161I,</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>TH179</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>SMARCA4 P153fs*150, BRAF V600E, STK11 R106R,  ...</td>\n",
       "      <td>55</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>TH103</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>KEAP1 L471L,</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>TH210</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>MAP2K2 I220I,</td>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>TH169</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>STK11 G294C,  EGFR E746_A750delELREA, KEAP1 M5...</td>\n",
       "      <td>177</td>\n",
       "      <td>24</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>TH222</td>\n",
       "      <td>ROS1</td>\n",
       "      <td>fusion</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>TH238</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>KRAS Q61H, MAP2K2 R297R, NFE2L2 G81S, KEAP1 V1...</td>\n",
       "      <td>240</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>TH205</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>EGFR L858R,</td>\n",
       "      <td>24</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>TH220</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>ALK A1251D,  KEAP1 V271M, KEAP1 R483C,  ALK--E...</td>\n",
       "      <td>132</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>TH179_NAT</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>PTPN13 S1001S,  KEAP1 L471L,  SMARCA4 H508H,  ...</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>TH248</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>SMARCA4 L6F,  EGFR K745_A750&gt;T,  TP53 L52P, E...</td>\n",
       "      <td>305</td>\n",
       "      <td>14</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>TH158</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>TH227</td>\n",
       "      <td>ALK</td>\n",
       "      <td>intron 19 rearrangement</td>\n",
       "      <td>NKX21 P155fs*43, HIF1A A612T, NFE2L2 S414N,</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>TH185</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PTP...</td>\n",
       "      <td>962</td>\n",
       "      <td>198</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>TH222</td>\n",
       "      <td>ROS1</td>\n",
       "      <td>fusion</td>\n",
       "      <td>KEAP1 W497*, PTPRT A1266A,</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>TH185</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>KEAP1 Y537Y,</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>TH226</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>EGFR G42D,  SMARCA4 P153fs*150, KEAP1 A95S, N...</td>\n",
       "      <td>187</td>\n",
       "      <td>9</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>TH179</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>NFE2L2 T80A, DROSHA E1363D, SMARCA4 A948V,  ST...</td>\n",
       "      <td>63</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>TH218</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>KEAP1 V155A,</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>TH103</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>EGFR T629T,  KEAP1 L471L,  EGFR R521K,</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>TH171</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>TP53 P105P, NOTCH1 T1996M, SMARCA4 R521W,  MAP...</td>\n",
       "      <td>634</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>TH179</td>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>SMARCA4 P153fs*150, KEAP1 G417E,</td>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>TH236</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>MAP2K2 R297R,  TP53 S166*, RAD21 V284F,  NFE2L...</td>\n",
       "      <td>107</td>\n",
       "      <td>19</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>TH248</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>TP53 V175L,  EGFR K745_A750&gt;T,  MAP2K2 R299M, ...</td>\n",
       "      <td>71</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>TH231</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>NFE2L2 G31E,</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>TH225</td>\n",
       "      <td>KRAS</td>\n",
       "      <td>G12C</td>\n",
       "      <td>EGFR R521K,  MAP2K2 I220I,  KRAS G12C,  EGFR ...</td>\n",
       "      <td>21</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>TH146</td>\n",
       "      <td>ROS1</td>\n",
       "      <td>ROS1-CD74</td>\n",
       "      <td>ROS1--CD74 fusion,</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>TH067</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>MAP2K2 F338Y,  TP53 F59F, SMARCA4 R521W, TP53 ...</td>\n",
       "      <td>101</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>TH266</td>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>TP53 E180K,  ALK--EML4 fusion, BAP1 A140V,  T...</td>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>TH217</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>TH218</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>TH155</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>TH169</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>TH185</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>TH158</td>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      patient driver_gene          driver_mutation  \\\n",
       "0       TH231         ALK                   fusion   \n",
       "1   TH238_NAT        BRAF                    V600E   \n",
       "2       TH103         ALK                   fusion   \n",
       "3       TH226        EGFR                    del19   \n",
       "4       TH223        EGFR                    del19   \n",
       "5       TH179        BRAF                    V600E   \n",
       "6       TH103         ALK                   fusion   \n",
       "7       TH210         ALK                   fusion   \n",
       "8       TH169        EGFR                    del19   \n",
       "9       TH222        ROS1                   fusion   \n",
       "10      TH238        BRAF                    V600E   \n",
       "11      TH205        EGFR                    L858R   \n",
       "12      TH220         ALK                   fusion   \n",
       "13  TH179_NAT        BRAF                    V600E   \n",
       "14      TH248        EGFR                    del19   \n",
       "15      TH158        EGFR                    del19   \n",
       "16      TH227         ALK  intron 19 rearrangement   \n",
       "17      TH185        EGFR                    L858R   \n",
       "18      TH222        ROS1                   fusion   \n",
       "19      TH185        EGFR                    L858R   \n",
       "20      TH226        EGFR                    del19   \n",
       "21      TH179        BRAF                    V600E   \n",
       "22      TH218        EGFR                    L858R   \n",
       "23      TH103         ALK                   fusion   \n",
       "24      TH171         ALK                   fusion   \n",
       "25      TH179        BRAF                    V600E   \n",
       "26      TH236        EGFR                    del19   \n",
       "27      TH248        EGFR                    del19   \n",
       "28      TH231         ALK                   fusion   \n",
       "29      TH225        KRAS                     G12C   \n",
       "30      TH146        ROS1                ROS1-CD74   \n",
       "31      TH067        EGFR                    del19   \n",
       "32      TH266         ALK                   fusion   \n",
       "33      TH217        EGFR                    del19   \n",
       "34      TH218        EGFR                    L858R   \n",
       "35      TH155        EGFR                    del19   \n",
       "36      TH169        EGFR                    del19   \n",
       "37      TH185        EGFR                    L858R   \n",
       "38      TH158        EGFR                    del19   \n",
       "\n",
       "                                      mutations_found  numTumorCells  \\\n",
       "0   KEAP1 V271M, KEAP1 R272H, RAD21 L324P, MAP2K2 ...            291   \n",
       "1                                      MAP2K2 I220I,               3   \n",
       "2   DROSHA N1255N,  PTPN13 S1001S, KEAP1 L471L,  E...             51   \n",
       "3    EGFR E746_A750delELREA, KEAP1 S592R,  SMARCA4...             14   \n",
       "4            EGFR T903T, SMARCA4 G394W, KEAP1 M161I,               6   \n",
       "5   SMARCA4 P153fs*150, BRAF V600E, STK11 R106R,  ...             55   \n",
       "6                                       KEAP1 L471L,               1   \n",
       "7                                      MAP2K2 I220I,              15   \n",
       "8   STK11 G294C,  EGFR E746_A750delELREA, KEAP1 M5...            177   \n",
       "9                                                 NaN              1   \n",
       "10  KRAS Q61H, MAP2K2 R297R, NFE2L2 G81S, KEAP1 V1...            240   \n",
       "11                                       EGFR L858R,              24   \n",
       "12  ALK A1251D,  KEAP1 V271M, KEAP1 R483C,  ALK--E...            132   \n",
       "13  PTPN13 S1001S,  KEAP1 L471L,  SMARCA4 H508H,  ...              7   \n",
       "14   SMARCA4 L6F,  EGFR K745_A750>T,  TP53 L52P, E...            305   \n",
       "15                                                NaN              7   \n",
       "16       NKX21 P155fs*43, HIF1A A612T, NFE2L2 S414N,              16   \n",
       "17  PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PTP...            962   \n",
       "18                        KEAP1 W497*, PTPRT A1266A,               6   \n",
       "19                                      KEAP1 Y537Y,               2   \n",
       "20   EGFR G42D,  SMARCA4 P153fs*150, KEAP1 A95S, N...            187   \n",
       "21  NFE2L2 T80A, DROSHA E1363D, SMARCA4 A948V,  ST...             63   \n",
       "22                                      KEAP1 V155A,              16   \n",
       "23            EGFR T629T,  KEAP1 L471L,  EGFR R521K,              14   \n",
       "24  TP53 P105P, NOTCH1 T1996M, SMARCA4 R521W,  MAP...            634   \n",
       "25                  SMARCA4 P153fs*150, KEAP1 G417E,              15   \n",
       "26  MAP2K2 R297R,  TP53 S166*, RAD21 V284F,  NFE2L...            107   \n",
       "27  TP53 V175L,  EGFR K745_A750>T,  MAP2K2 R299M, ...             71   \n",
       "28                                      NFE2L2 G31E,               2   \n",
       "29   EGFR R521K,  MAP2K2 I220I,  KRAS G12C,  EGFR ...             21   \n",
       "30                                ROS1--CD74 fusion,               3   \n",
       "31  MAP2K2 F338Y,  TP53 F59F, SMARCA4 R521W, TP53 ...            101   \n",
       "32   TP53 E180K,  ALK--EML4 fusion, BAP1 A140V,  T...             28   \n",
       "33                                                NaN              5   \n",
       "34                                                NaN              1   \n",
       "35                                                NaN             17   \n",
       "36                                                NaN              2   \n",
       "37                                                NaN              1   \n",
       "38                                                NaN              4   \n",
       "\n",
       "    numTumorCells_w_coverage_to_ROI  numTumorCells_clinMut_found  \n",
       "0                                 0                            0  \n",
       "1                                 0                            0  \n",
       "2                                 0                            5  \n",
       "3                                 1                            1  \n",
       "4                                 0                            0  \n",
       "5                                 3                            3  \n",
       "6                                 0                            0  \n",
       "7                                 0                            0  \n",
       "8                                24                           23  \n",
       "9                                 0                            0  \n",
       "10                                2                            2  \n",
       "11                                1                            1  \n",
       "12                                0                            3  \n",
       "13                                0                            0  \n",
       "14                               14                           13  \n",
       "15                                0                            0  \n",
       "16                                0                            0  \n",
       "17                              198                           10  \n",
       "18                                0                            0  \n",
       "19                                0                            0  \n",
       "20                                9                            9  \n",
       "21                                0                            0  \n",
       "22                                0                            0  \n",
       "23                                0                            0  \n",
       "24                                0                            3  \n",
       "25                                0                            0  \n",
       "26                               19                           19  \n",
       "27                                6                            5  \n",
       "28                                0                            0  \n",
       "29                               15                           15  \n",
       "30                                0                            3  \n",
       "31                                0                            0  \n",
       "32                                0                            2  \n",
       "33                                0                            0  \n",
       "34                                0                            0  \n",
       "35                                0                            0  \n",
       "36                                0                            0  \n",
       "37                                0                            0  \n",
       "38                                0                            0  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vali_tbl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TH231': 0,\n",
       " 'TH238_NAT': 0,\n",
       " 'TH103': 0,\n",
       " 'TH226': 10,\n",
       " 'TH223': 0,\n",
       " 'TH179': 3,\n",
       " 'TH210': 0,\n",
       " 'TH169': 24,\n",
       " 'TH222': 0,\n",
       " 'TH238': 2,\n",
       " 'TH205': 1,\n",
       " 'TH220': 0,\n",
       " 'TH179_NAT': 0,\n",
       " 'TH248': 20,\n",
       " 'TH158': 0,\n",
       " 'TH227': 0,\n",
       " 'TH185': 198,\n",
       " 'TH218': 0,\n",
       " 'TH171': 0,\n",
       " 'TH236': 19,\n",
       " 'TH225': 15,\n",
       " 'TH146': 0,\n",
       " 'TH067': 0,\n",
       " 'TH266': 0,\n",
       " 'TH217': 0,\n",
       " 'TH155': 0}"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create dict in the same way, for numTumorCells_w_coverage_to_ROI\n",
    "    # init empty dict, just a list of the patients we've got\n",
    "numTumorCells_cov_d = {}\n",
    "\n",
    "for idx, row in vali_tbl.iterrows():\n",
    "    patient = vali_tbl.patient[idx]\n",
    "    numTumorCells_cov_d.update({patient : 0})\n",
    "    \n",
    "\n",
    "    \n",
    "# now fill in\n",
    "for idx, row in vali_tbl.iterrows():\n",
    "    \n",
    "    curr_patient = vali_tbl.patient[idx]\n",
    "    curr_num = vali_tbl.numTumorCells_w_coverage_to_ROI[idx]\n",
    "    curr_val = numTumorCells_cov_d[curr_patient]\n",
    "    curr_val += curr_num # add in curr muts\n",
    " \n",
    "    new_d = {curr_patient:curr_val}\n",
    "    numTumorCells_cov_d.update(new_d)\n",
    "    \n",
    "numTumorCells_cov_d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'TH231': 0,\n",
       " 'TH238_NAT': 0,\n",
       " 'TH103': 5,\n",
       " 'TH226': 10,\n",
       " 'TH223': 0,\n",
       " 'TH179': 3,\n",
       " 'TH210': 0,\n",
       " 'TH169': 23,\n",
       " 'TH222': 0,\n",
       " 'TH238': 2,\n",
       " 'TH205': 1,\n",
       " 'TH220': 3,\n",
       " 'TH179_NAT': 0,\n",
       " 'TH248': 18,\n",
       " 'TH158': 0,\n",
       " 'TH227': 0,\n",
       " 'TH185': 10,\n",
       " 'TH218': 0,\n",
       " 'TH171': 3,\n",
       " 'TH236': 19,\n",
       " 'TH225': 15,\n",
       " 'TH146': 3,\n",
       " 'TH067': 0,\n",
       " 'TH266': 2,\n",
       " 'TH217': 0,\n",
       " 'TH155': 0}"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# and finally, for numTumorCells_clinMut_found\n",
    "\n",
    "# create dict in the same way, for numTumorCells_w_coverage_to_ROI\n",
    "    # init empty dict, just a list of the patients we've got\n",
    "numTumorCells_clinMut_d = {}\n",
    "\n",
    "for idx, row in vali_tbl.iterrows():\n",
    "    patient = vali_tbl.patient[idx]\n",
    "    numTumorCells_clinMut_d.update({patient : 0})\n",
    "    \n",
    "\n",
    "    \n",
    "# now fill in\n",
    "for idx, row in vali_tbl.iterrows():\n",
    "    \n",
    "    curr_patient = vali_tbl.patient[idx]\n",
    "    curr_num = vali_tbl.numTumorCells_clinMut_found[idx]\n",
    "    curr_val = numTumorCells_clinMut_d[curr_patient]\n",
    "    curr_val += curr_num # add in curr muts\n",
    " \n",
    "    new_d = {curr_patient:curr_val}\n",
    "    numTumorCells_clinMut_d.update(new_d)\n",
    "    \n",
    "numTumorCells_clinMut_d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>driver_gene</th>\n",
       "      <th>driver_mutation</th>\n",
       "      <th>muts_found_sc</th>\n",
       "      <th>muts_found_WES</th>\n",
       "      <th>numTumorCells</th>\n",
       "      <th>numTumorCells_w_coverage_to_ROI</th>\n",
       "      <th>numTumorCells_clinMut_found</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TH238_NAT</th>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>[MAP2K2 I220I, ]</td>\n",
       "      <td></td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH225</th>\n",
       "      <td>KRAS</td>\n",
       "      <td>G12C</td>\n",
       "      <td>[ EGFR R521K,  MAP2K2 I220I,  KRAS G12C,  EGFR...</td>\n",
       "      <td></td>\n",
       "      <td>21</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH146</th>\n",
       "      <td>ROS1</td>\n",
       "      <td>ROS1-CD74</td>\n",
       "      <td>[ ROS1--CD74 fusion, ]</td>\n",
       "      <td></td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH236</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[MAP2K2 R297R,  TP53 S166*, RAD21 V284F,  NFE2...</td>\n",
       "      <td></td>\n",
       "      <td>107</td>\n",
       "      <td>19</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH171</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[TP53 P105P, NOTCH1 T1996M, SMARCA4 R521W,  MA...</td>\n",
       "      <td>[DROSHA_S321L, SPTA1_K1693Q, TP53_P72R]</td>\n",
       "      <td>634</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH210</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[MAP2K2 I220I, ]</td>\n",
       "      <td>[PIK3CA_I391M, RET_G691S, ROS1_N2240K]</td>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH227</th>\n",
       "      <td>ALK</td>\n",
       "      <td>intron 19 rearrangement</td>\n",
       "      <td>[NKX21 P155fs*43, HIF1A A612T, NFE2L2 S414N, ]</td>\n",
       "      <td></td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH218</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>[KEAP1 V155A, , nan]</td>\n",
       "      <td></td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH185</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>[PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PT...</td>\n",
       "      <td></td>\n",
       "      <td>965</td>\n",
       "      <td>198</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH222</th>\n",
       "      <td>ROS1</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[nan, KEAP1 W497*, PTPRT A1266A, ]</td>\n",
       "      <td></td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179</th>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>[SMARCA4 P153fs*150, BRAF V600E, STK11 R106R, ...</td>\n",
       "      <td>[BRAF_V600E, BRCA2_N372H, DROSHA_S321L, RET_G6...</td>\n",
       "      <td>133</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH158</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PT...</td>\n",
       "      <td></td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH231</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[KEAP1 V271M, KEAP1 R272H, RAD21 L324P, MAP2K2...</td>\n",
       "      <td>[BRCA2_N372H, DROSHA_S321L, ROS1_K2228Q, ROS1_...</td>\n",
       "      <td>293</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH226</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[ EGFR E746_A750delELREA, KEAP1 S592R,  SMARCA...</td>\n",
       "      <td>[EGFR_K745_A750&gt;T, EGFR_R521K, ERBB2_I655V, RE...</td>\n",
       "      <td>201</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH155</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[nan]</td>\n",
       "      <td>[DROSHA_S321L, EGFR_K745_A750&gt;T, EGFR_L747_T75...</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH223</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[ EGFR T903T, SMARCA4 G394W, KEAP1 M161I, ]</td>\n",
       "      <td></td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH220</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[ALK A1251D,  KEAP1 V271M, KEAP1 R483C,  ALK--...</td>\n",
       "      <td>[EGFR_R521K, ERBB2_I655V, ROS1_K2228Q, ROS1_S2...</td>\n",
       "      <td>132</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH179_NAT</th>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>[PTPN13 S1001S,  KEAP1 L471L,  SMARCA4 H508H, ...</td>\n",
       "      <td></td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH103</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[DROSHA N1255N,  PTPN13 S1001S, KEAP1 L471L,  ...</td>\n",
       "      <td></td>\n",
       "      <td>66</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH205</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>L858R</td>\n",
       "      <td>[ EGFR L858R, ]</td>\n",
       "      <td>[BRCA2_N372H, EGFR_D1014N, EGFR_R521K, ERBB2_I...</td>\n",
       "      <td>24</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH169</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[STK11 G294C,  EGFR E746_A750delELREA, KEAP1 M...</td>\n",
       "      <td>[BRCA2_N289H, DROSHA_S321L, EGFR_K745_A750&gt;T, ...</td>\n",
       "      <td>179</td>\n",
       "      <td>24</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH067</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[MAP2K2 F338Y,  TP53 F59F, SMARCA4 R521W, TP53...</td>\n",
       "      <td>[BRCA2_N372H, DROSHA_S321L, EGFR_K745_A750&gt;T, ...</td>\n",
       "      <td>101</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH217</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[nan]</td>\n",
       "      <td></td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH266</th>\n",
       "      <td>ALK</td>\n",
       "      <td>fusion</td>\n",
       "      <td>[ TP53 E180K,  ALK--EML4 fusion, BAP1 A140V,  ...</td>\n",
       "      <td></td>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH238</th>\n",
       "      <td>BRAF</td>\n",
       "      <td>V600E</td>\n",
       "      <td>[KRAS Q61H, MAP2K2 R297R, NFE2L2 G81S, KEAP1 V...</td>\n",
       "      <td>[BRCA2_N372H, DROSHA_S321L, EGFR_R521K, RET_G6...</td>\n",
       "      <td>240</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TH248</th>\n",
       "      <td>EGFR</td>\n",
       "      <td>del19</td>\n",
       "      <td>[ SMARCA4 L6F,  EGFR K745_A750&gt;T,  TP53 L52P, ...</td>\n",
       "      <td></td>\n",
       "      <td>376</td>\n",
       "      <td>20</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          driver_gene          driver_mutation  \\\n",
       "TH238_NAT        BRAF                    V600E   \n",
       "TH225            KRAS                     G12C   \n",
       "TH146            ROS1                ROS1-CD74   \n",
       "TH236            EGFR                    del19   \n",
       "TH171             ALK                   fusion   \n",
       "TH210             ALK                   fusion   \n",
       "TH227             ALK  intron 19 rearrangement   \n",
       "TH218            EGFR                    L858R   \n",
       "TH185            EGFR                    L858R   \n",
       "TH222            ROS1                   fusion   \n",
       "TH179            BRAF                    V600E   \n",
       "TH158            EGFR                    del19   \n",
       "TH231             ALK                   fusion   \n",
       "TH226            EGFR                    del19   \n",
       "TH155            EGFR                    del19   \n",
       "TH223            EGFR                    del19   \n",
       "TH220             ALK                   fusion   \n",
       "TH179_NAT        BRAF                    V600E   \n",
       "TH103             ALK                   fusion   \n",
       "TH205            EGFR                    L858R   \n",
       "TH169            EGFR                    del19   \n",
       "TH067            EGFR                    del19   \n",
       "TH217            EGFR                    del19   \n",
       "TH266             ALK                   fusion   \n",
       "TH238            BRAF                    V600E   \n",
       "TH248            EGFR                    del19   \n",
       "\n",
       "                                               muts_found_sc  \\\n",
       "TH238_NAT                                   [MAP2K2 I220I, ]   \n",
       "TH225      [ EGFR R521K,  MAP2K2 I220I,  KRAS G12C,  EGFR...   \n",
       "TH146                                 [ ROS1--CD74 fusion, ]   \n",
       "TH236      [MAP2K2 R297R,  TP53 S166*, RAD21 V284F,  NFE2...   \n",
       "TH171      [TP53 P105P, NOTCH1 T1996M, SMARCA4 R521W,  MA...   \n",
       "TH210                                       [MAP2K2 I220I, ]   \n",
       "TH227         [NKX21 P155fs*43, HIF1A A612T, NFE2L2 S414N, ]   \n",
       "TH218                                   [KEAP1 V155A, , nan]   \n",
       "TH185      [PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PT...   \n",
       "TH222                     [nan, KEAP1 W497*, PTPRT A1266A, ]   \n",
       "TH179      [SMARCA4 P153fs*150, BRAF V600E, STK11 R106R, ...   \n",
       "TH158      [PTPN13 S1001S,  KEAP1 L471L,  STK11 V41M,  PT...   \n",
       "TH231      [KEAP1 V271M, KEAP1 R272H, RAD21 L324P, MAP2K2...   \n",
       "TH226      [ EGFR E746_A750delELREA, KEAP1 S592R,  SMARCA...   \n",
       "TH155                                                  [nan]   \n",
       "TH223            [ EGFR T903T, SMARCA4 G394W, KEAP1 M161I, ]   \n",
       "TH220      [ALK A1251D,  KEAP1 V271M, KEAP1 R483C,  ALK--...   \n",
       "TH179_NAT  [PTPN13 S1001S,  KEAP1 L471L,  SMARCA4 H508H, ...   \n",
       "TH103      [DROSHA N1255N,  PTPN13 S1001S, KEAP1 L471L,  ...   \n",
       "TH205                                        [ EGFR L858R, ]   \n",
       "TH169      [STK11 G294C,  EGFR E746_A750delELREA, KEAP1 M...   \n",
       "TH067      [MAP2K2 F338Y,  TP53 F59F, SMARCA4 R521W, TP53...   \n",
       "TH217                                                  [nan]   \n",
       "TH266      [ TP53 E180K,  ALK--EML4 fusion, BAP1 A140V,  ...   \n",
       "TH238      [KRAS Q61H, MAP2K2 R297R, NFE2L2 G81S, KEAP1 V...   \n",
       "TH248      [ SMARCA4 L6F,  EGFR K745_A750>T,  TP53 L52P, ...   \n",
       "\n",
       "                                              muts_found_WES numTumorCells  \\\n",
       "TH238_NAT                                                                3   \n",
       "TH225                                                                   21   \n",
       "TH146                                                                    3   \n",
       "TH236                                                                  107   \n",
       "TH171                [DROSHA_S321L, SPTA1_K1693Q, TP53_P72R]           634   \n",
       "TH210                 [PIK3CA_I391M, RET_G691S, ROS1_N2240K]            15   \n",
       "TH227                                                                   16   \n",
       "TH218                                                                   17   \n",
       "TH185                                                                  965   \n",
       "TH222                                                                    7   \n",
       "TH179      [BRAF_V600E, BRCA2_N372H, DROSHA_S321L, RET_G6...           133   \n",
       "TH158                                                                   11   \n",
       "TH231      [BRCA2_N372H, DROSHA_S321L, ROS1_K2228Q, ROS1_...           293   \n",
       "TH226      [EGFR_K745_A750>T, EGFR_R521K, ERBB2_I655V, RE...           201   \n",
       "TH155      [DROSHA_S321L, EGFR_K745_A750>T, EGFR_L747_T75...            17   \n",
       "TH223                                                                    6   \n",
       "TH220      [EGFR_R521K, ERBB2_I655V, ROS1_K2228Q, ROS1_S2...           132   \n",
       "TH179_NAT                                                                7   \n",
       "TH103                                                                   66   \n",
       "TH205      [BRCA2_N372H, EGFR_D1014N, EGFR_R521K, ERBB2_I...            24   \n",
       "TH169      [BRCA2_N289H, DROSHA_S321L, EGFR_K745_A750>T, ...           179   \n",
       "TH067      [BRCA2_N372H, DROSHA_S321L, EGFR_K745_A750>T, ...           101   \n",
       "TH217                                                                    5   \n",
       "TH266                                                                   28   \n",
       "TH238      [BRCA2_N372H, DROSHA_S321L, EGFR_R521K, RET_G6...           240   \n",
       "TH248                                                                  376   \n",
       "\n",
       "          numTumorCells_w_coverage_to_ROI numTumorCells_clinMut_found  \n",
       "TH238_NAT                               0                           0  \n",
       "TH225                                  15                          15  \n",
       "TH146                                   0                           3  \n",
       "TH236                                  19                          19  \n",
       "TH171                                   0                           3  \n",
       "TH210                                   0                           0  \n",
       "TH227                                   0                           0  \n",
       "TH218                                   0                           0  \n",
       "TH185                                 198                          10  \n",
       "TH222                                   0                           0  \n",
       "TH179                                   3                           3  \n",
       "TH158                                   0                           0  \n",
       "TH231                                   0                           0  \n",
       "TH226                                  10                          10  \n",
       "TH155                                   0                           0  \n",
       "TH223                                   0                           0  \n",
       "TH220                                   0                           3  \n",
       "TH179_NAT                               0                           0  \n",
       "TH103                                   0                           5  \n",
       "TH205                                   1                           1  \n",
       "TH169                                  24                          23  \n",
       "TH067                                   0                           0  \n",
       "TH217                                   0                           0  \n",
       "TH266                                   0                           2  \n",
       "TH238                                   2                           2  \n",
       "TH248                                  20                          18  "
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#now fill in patient_vali_tbl\n",
    "for idx, row in patient_vali_tbl.iterrows():\n",
    "    \n",
    "    if idx in numTumorCells_d:\n",
    "        numTumorCells = numTumorCells_d[idx]\n",
    "        patient_vali_tbl.at[idx, 'numTumorCells'] = numTumorCells\n",
    "        \n",
    "    if idx in numTumorCells_cov_d:\n",
    "        numTumorCells_c = numTumorCells_cov_d[idx]\n",
    "        patient_vali_tbl.at[idx, 'numTumorCells_w_coverage_to_ROI'] = numTumorCells_c\n",
    "        \n",
    "    if idx in numTumorCells_clinMut_d:\n",
    "        numTumorCells_clinMut = numTumorCells_clinMut_d[idx]\n",
    "        patient_vali_tbl.at[idx, 'numTumorCells_clinMut_found'] = numTumorCells_clinMut\n",
    "        \n",
    "        \n",
    "patient_vali_tbl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 383,
   "metadata": {},
   "outputs": [],
   "source": [
    "patient_vali_tbl.to_csv('../data_out/NI11_validation_table_tumor_exome.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
